Data Exploration

Load the dataset and pre-process the data.
load HCVEgyptData.mat
disp(HCV)
Age Gender BMI Fever NauseaVomting Headache Diarrhea Fatiguegeneralizedboneache Jaundice Epigastricpain WBC RBC HGB Plat AST1 ALT1 ALT4 ALT12 ALT24 ALT36 ALT48 ALTafter24w RNABase RNA4 RNA12 RNAEOT RNAEF BaselinehistologicalGrading Baselinehistologicalstaging ___ ______ ___ _____ _____________ ________ ________ __________________________ ________ ______________ _____ __________ ___ __________ ____ ____ ____ _____ _____ _____ _____ ___________ __________ __________ __________ __________ __________ ___________________________ ___________________________ 56 1 35 2 1 1 1 2 2 2 7425 4.2488e+06 14 1.1213e+05 99 84 52 109 81 5 5 5 6.5533e+05 6.3454e+05 2.8819e+05 5 5 13 2 46 1 29 1 2 2 1 2 2 1 12101 4.4294e+06 10 1.2937e+05 91 123 95 75 113 57 123 44 40620 5.3864e+05 6.3706e+05 3.368e+05 31085 4 2 57 1 33 2 2 2 2 1 1 1 4178 4.6212e+06 12 1.5152e+05 113 49 95 107 116 5 5 5 5.7115e+05 6.6135e+05 5 7.3595e+05 5.5883e+05 4 4 49 2 33 1 2 1 2 1 2 1 6490 4.7946e+06 10 1.4646e+05 43 64 109 80 88 48 77 33 1.0419e+06 4.4994e+05 5.8569e+05 7.4446e+05 5.823e+05 10 3 59 1 32 1 1 2 1 2 2 2 3661 4.6064e+06 11 1.8768e+05 99 104 67 48 120 94 90 30 6.6041e+05 7.3876e+05 3.7315e+06 3.3895e+05 2.4286e+05 11 1 58 2 22 2 2 2 1 2 2 1 11785 3.8825e+06 15 1.3123e+05 66 104 121 96 65 73 114 29 1.1575e+06 1.0869e+06 5 5 5 4 4 42 2 26 1 1 2 2 2 2 2 11620 4.7473e+06 12 1.7726e+05 78 57 113 118 107 84 80 28 3.2569e+05 1.034e+06 2.751e+05 2.1457e+05 6.3516e+05 12 4 48 2 30 1 1 2 2 1 1 2 7335 4.4059e+06 11 2.1618e+05 119 112 80 127 45 96 53 39 6.4113e+05 72050 7.873e+05 3.7061e+05 5.063e+05 12 3 44 1 23 1 1 2 2 2 1 2 10480 4.6085e+06 12 1.4889e+05 93 83 55 102 97 122 39 45 5.9144e+05 7.5736e+05 5 3.7109e+05 2.0304e+05 5 2 45 1 30 2 1 2 2 1 1 2 6681 4.4553e+06 12 98200 55 68 72 127 81 125 43 30 1.1512e+06 2.3049e+05 2.6732e+05 2.753e+05 5.5552e+05 4 2 37 2 24 2 1 2 1 2 2 1 4437 4.265e+06 12 1.6603e+05 103 124 111 74 53 123 101 33 1.0231e+06 1.0319e+05 7.3193e+05 4.4847e+05 59998 15 2 36 1 22 2 2 1 1 1 1 1 6052 4.1302e+06 13 1.4427e+05 75 49 93 52 46 46 59 45 1.3771e+05 1.123e+06 5.6144e+05 63145 8.062e+05 16 1 45 2 25 2 1 1 1 2 1 2 9279 4.1169e+06 13 2.03e+05 97 101 66 53 95 55 104 26 9.3644e+05 5.3697e+05 5 5 5 8 1 34 1 22 1 2 1 1 2 2 1 5638 4.3216e+06 14 1.4111e+05 120 61 64 51 78 90 113 23 3.9298e+05 8.8432e+05 5.8683e+05 1.8278e+05 7.8215e+05 9 2 40 2 32 2 2 2 1 2 1 1 11507 4.1656e+06 14 2.2287e+05 127 122 106 105 88 111 111 36 1.1337e+06 1.1119e+06 4.213e+05 4.3754e+05 1.2461e+05 8 2 58 1 34 2 1 1 1 2 1 1 8035 4.8965e+06 11 1.4951e+05 117 53 50 80 120 66 86 34 6.1495e+05 3.143e+05 83690 6.7149e+05 1.3515e+05 15 1 61 1 35 1 2 2 2 1 1 2 10843 4.1652e+06 10 1.9764e+05 86 105 70 86 83 87 47 33 9.001e+05 7.2146e+05 5 5 5 3 4 55 2 24 2 1 2 2 2 2 2 8476 4.4669e+06 14 1.6328e+05 53 101 50 95 112 97 68 27 1.1453e+06 2.3099e+05 4.5788e+05 3.1836e+05 2.5642e+05 4 3 56 1 27 1 2 2 2 2 2 2 6599 4.4485e+06 15 1.9064e+05 53 124 62 76 57 46 93 26 5.0676e+05 3.5918e+05 7.434e+05 4.0518e+05 1.6298e+05 6 4 35 2 23 2 2 1 1 1 1 2 4845 4.436e+06 10 1.1182e+05 115 121 63 127 95 124 93 42 1.0805e+06 76404 7.1716e+05 4.0431e+05 4.7772e+05 16 4 57 2 23 1 1 2 2 1 1 1 5925 4.0316e+06 15 1.1656e+05 86 109 118 119 55 103 84 32 1.6962e+05 7.8602e+05 6.6908e+05 5.3119e+05 2.8252e+05 6 2 33 1 25 2 1 2 2 2 2 2 9952 4.9947e+06 10 1.0902e+05 84 77 67 81 117 68 42 32 1.1352e+06 5.7275e+05 5 5 5 4 1 41 1 23 1 2 2 2 2 1 2 7961 4.5955e+06 14 94733 45 92 103 104 40 115 93 33 2.9338e+05 4.4058e+05 53098 18292 1.8734e+05 10 3 39 2 29 1 2 1 2 1 1 2 7136 4.6252e+06 10 2.1136e+05 70 102 76 58 111 95 58 25 9.9394e+05 9.9265e+05 96482 3.349e+05 7.6276e+05 15 4 33 2 24 1 2 2 2 2 1 2 6057 4.3008e+06 11 2.2214e+05 62 91 116 128 41 70 106 43 2.4343e+05 9.8137e+05 12504 3.6002e+05 7.5338e+05 6 3 43 2 34 2 2 2 1 1 1 1 6648 4.5293e+06 15 1.0987e+05 48 112 99 85 59 87 78 35 9.553e+05 5.4065e+05 5 5 5 9 4 51 1 34 2 1 2 2 1 1 2 11032 4.0526e+06 15 94503 41 54 128 64 71 89 87 34 7.6636e+05 5.3127e+05 7.376e+05 7.3486e+05 3.7284e+05 5 1 39 2 33 2 1 2 1 1 1 2 5234 4.9062e+06 12 1.9031e+05 61 120 113 75 88 114 99 43 4.8647e+05 45990 45578 7.3329e+05 19572 15 2 57 2 26 1 2 2 1 1 1 2 6038 4.7633e+06 13 1.2672e+05 51 118 98 42 93 53 83 45 2.8537e+05 1.8666e+05 5 5 5 9 3 47 2 29 1 1 2 1 2 1 2 5846 4.7535e+06 15 1.0473e+05 120 72 117 126 45 95 49 38 4.2614e+05 2.4778e+05 7.6702e+05 3.7712e+05 3.1515e+05 9 1 55 2 33 1 2 2 1 2 1 2 5383 3.9994e+06 15 1.8226e+05 96 49 59 88 62 58 81 41 1.1943e+06 9.2868e+05 29778 1.2425e+05 2.4405e+05 7 1 58 2 35 2 2 2 2 1 1 1 7378 3.9989e+06 10 2.0111e+05 57 110 128 96 69 105 72 26 5.5771e+05 2.8771e+05 6.2359e+05 66891 35044 5 2 47 2 25 2 1 2 2 2 1 2 7486 4.5995e+06 11 1.6735e+05 94 64 54 122 64 64 96 24 6.0406e+05 4.1631e+05 3.2335e+05 7.1666e+05 6.7855e+05 8 1 61 1 33 1 2 2 2 1 1 1 11770 4.5811e+06 13 1.2564e+05 42 47 82 102 48 76 53 34 1.1599e+06 3.1851e+05 4.6326e+05 3.8135e+05 2.8291e+05 10 1 37 1 27 2 2 1 2 2 2 2 6441 4.0755e+06 11 1.1874e+05 42 118 67 111 48 107 101 45 2.726e+05 91626 4.0452e+05 6.741e+05 2.4255e+05 11 3 41 1 29 1 2 1 1 2 1 2 10304 4.1526e+06 14 1.2081e+05 128 102 79 63 80 86 127 40 1.1652e+06 51508 3.6718e+05 5.8801e+05 7.4633e+05 16 3 60 2 32 2 2 1 2 2 2 1 7365 4.0232e+06 14 2.2247e+05 52 126 67 126 126 41 54 35 1.124e+05 4.8911e+05 4.6164e+05 3.3601e+05 2.8726e+05 5 2 54 1 29 1 1 1 2 2 1 1 10704 4.9116e+06 10 1.7173e+05 40 43 46 64 101 45 91 43 47190 5.81e+05 7.8978e+05 2.6294e+05 1.1897e+05 15 1 40 2 28 2 1 2 1 1 1 1 3009 4.3542e+06 14 95604 69 58 62 50 60 84 114 31 9.6129e+05 71146 28241 31034 1417 9 2 32 1 31 1 2 1 1 1 1 2 9956 3.9395e+06 14 1.9643e+05 78 81 90 48 68 83 128 39 8.551e+05 1.0252e+05 4.0731e+05 2.2001e+05 4.055e+05 7 4 58 2 33 1 2 2 2 1 2 2 6627 3.8896e+06 14 1.829e+05 106 69 127 99 47 103 111 33 1.0475e+06 3.2035e+05 3.4945e+05 5.4683e+05 6.4394e+05 13 1 37 2 23 2 2 1 1 2 2 1 10393 4.3086e+06 11 1.8411e+05 93 56 40 124 101 50 90 33 2.7135e+05 2.0633e+05 1.5122e+05 3.0773e+05 1.7452e+05 6 1 58 1 23 1 1 1 2 1 1 2 10236 4.7979e+06 14 1.0151e+05 127 92 94 113 96 126 108 39 2.7251e+05 1.0612e+06 2.3095e+05 2.02e+05 2.938e+05 14 3 36 1 23 2 2 1 2 2 2 2 4387 4.7359e+06 14 1.7768e+05 80 48 76 120 82 111 101 43 5.9425e+05 1.1569e+06 4.3651e+05 7.2828e+05 6.8529e+05 5 4 47 2 35 1 2 2 1 2 2 2 11924 3.9025e+06 14 97785 88 89 47 48 63 57 114 26 6.5167e+05 4.2273e+05 4.1209e+05 91529 3.7639e+05 12 3 50 1 33 2 2 1 1 2 1 2 10140 4.5704e+06 10 2.0983e+05 105 116 106 111 84 107 71 35 57911 8.6779e+05 7.5877e+05 3.1969e+05 1.5976e+05 13 2 44 1 31 1 1 1 1 2 1 2 3470 4.2125e+06 12 1.5034e+05 114 68 127 47 117 128 53 32 7.5107e+05 8.2558e+05 3.5592e+05 1.0095e+05 6.3417e+05 11 4 43 1 33 1 1 2 2 2 1 2 5420 4.282e+06 10 1.8699e+05 113 40 42 42 78 106 57 28 7.405e+05 1.1974e+06 1.8045e+05 5.2456e+05 2.9181e+05 15 2 54 1 33 2 1 1 2 1 2 1 6963 4.9724e+06 10 1.894e+05 125 48 118 59 70 105 110 25 8.5158e+05 6.9952e+05 6.784e+05 7.6577e+05 5.4642e+05 9 3 59 2 26 2 1 1 1 1 1 2 6249 4.3276e+06 12 1.381e+05 100 109 123 85 95 90 72 27 6.8607e+05 49524 61646 7.0453e+05 2.4327e+05 15 1 33 2 31 1 1 1 2 1 2 1 5094 4.6796e+06 15 1.233e+05 84 71 62 117 82 116 105 42 9.7329e+05 9.9875e+05 7.5606e+05 1.7506e+05 2.4159e+05 8 4 56 2 23 1 1 1 2 2 2 2 4797 4.0542e+06 14 1.7591e+05 70 58 100 112 80 76 101 43 5.0885e+05 1.3074e+05 43228 3.0062e+05 92124 13 2 41 1 33 2 1 1 1 2 2 2 5041 4.1432e+06 13 1.2087e+05 115 93 109 48 56 76 100 43 1.1898e+05 3.6624e+05 13412 5.7512e+05 3.316e+05 8 1 59 1 32 2 1 1 1 1 2 2 6901 4.4217e+06 12 2.1228e+05 84 41 85 97 44 89 125 32 2.5385e+05 2.5791e+05 3.8231e+05 6.7271e+05 3.8516e+05 5 2 47 1 27 2 2 1 1 1 1 1 7256 3.9396e+06 14 1.978e+05 55 84 124 59 113 44 67 44 5.9866e+05 5.7407e+05 3.5612e+05 78520 6.0707e+05 13 1 50 2 34 1 1 1 2 2 1 1 8219 4.0035e+06 14 2.177e+05 49 54 127 60 39 110 46 23 1.4965e+05 2.0861e+05 2.6658e+05 53715 4.8138e+05 11 2 39 2 30 1 2 1 1 1 1 1 4418 4.6514e+06 10 1.8401e+05 82 75 53 75 95 96 44 32 2.3267e+05 8.802e+05 1.7504e+05 1.8127e+05 1.8415e+05 14 3 48 1 33 1 1 2 1 1 1 2 6358 4.1492e+06 15 1.2098e+05 40 67 40 122 71 117 111 30 3.5056e+05 1.505e+05 5.8508e+05 6325 5.3733e+05 13 4 32 2 27 1 1 1 1 2 1 2 8669 4.6162e+06 11 1.3865e+05 75 46 56 120 84 91 56 23 5.0295e+05 5.9787e+05 3.3942e+05 1.1695e+05 7.9396e+05 13 1 33 2 24 1 1 1 2 1 1 2 9435 4.1163e+06 13 1.3034e+05 79 63 110 54 41 102 41 27 1.0106e+06 1.1522e+06 93316 6.3947e+05 63629 3 1 51 2 26 2 2 2 2 2 2 2 11144 4.8441e+06 10 1.651e+05 73 73 60 82 107 79 42 32 1.9234e+05 4.6814e+05 97485 2.6912e+05 1.6345e+05 8 4 50 2 23 2 1 2 1 1 2 2 5060 4.4344e+06 13 1.3825e+05 78 63 58 105 113 57 89 29 3.6543e+05 2.3943e+05 7.5551e+05 2.9027e+05 6.4382e+05 12 3 42 2 23 1 1 2 2 1 1 1 7766 3.962e+06 13 1.2563e+05 109 89 42 60 42 72 55 26 6.201e+05 5.4474e+05 7.4064e+05 4.6846e+05 2.0832e+05 12 2 48 2 33 2 1 2 2 2 1 2 10879 4.0978e+06 15 1.638e+05 104 126 107 74 103 97 114 38 1.0596e+05 2.1608e+05 2.9709e+05 7.5566e+05 7.8007e+05 9 1 45 1 31 1 2 2 2 1 2 2 11490 3.879e+06 15 1.7781e+05 121 65 71 44 100 128 110 27 4.2092e+05 1.1096e+06 6.557e+05 7.5341e+05 5.5461e+05 9 2 58 1 28 1 2 2 2 1 1 1 4082 4.7311e+06 13 2.1959e+05 98 86 53 118 128 103 119 26 1.7517e+05 68868 6.3267e+05 2.0772e+05 2.5893e+05 10 4 36 2 31 1 2 1 2 2 2 1 5078 4.6562e+06 15 1.9904e+05 89 98 44 59 124 41 45 28 1.1778e+06 7.1416e+05 93094 3.1418e+05 1.0276e+05 5 1 50 2 22 2 1 2 1 1 2 2 4580 4.489e+06 14 1.7049e+05 70 103 52 90 119 41 57 29 5.9024e+05 2.9061e+05 2.5659e+05 94684 5.4405e+05 14 4 40 2 23 1 1 1 2 2 1 2 7983 4.8722e+06 10 2.1756e+05 106 127 94 89 98 80 61 35 7.2781e+05 4.6575e+05 3.8292e+05 7.7546e+05 24476 4 2 37 1 27 2 2 1 1 2 2 2 5500 3.9777e+06 14 2.0349e+05 119 44 57 112 109 104 56 33 9.3676e+05 7.3754e+05 2.2739e+05 4.1016e+05 6.3676e+05 15 3 39 1 35 1 2 2 1 1 2 1 3956 4.3163e+06 15 1.1227e+05 107 127 98 116 91 114 76 41 6.3289e+05 4.502e+05 3.6842e+05 3.7663e+05 7.0399e+05 5 2 54 2 27 2 1 1 2 1 1 2 9532 4.5881e+06 15 1.6256e+05 57 78 116 114 40 56 95 25 6.7097e+05 1.1588e+06 7.0653e+05 2.5998e+05 1.9053e+05 12 1 43 2 23 2 2 1 2 2 1 1 3555 4.6022e+06 13 1.599e+05 92 61 66 80 83 80 106 36 3.2771e+05 9.2791e+05 5.5474e+05 3.7509e+05 1.5753e+05 4 3 61 1 25 1 1 2 2 2 2 2 4316 4.4792e+06 12 1.5153e+05 71 113 83 94 45 49 128 30 1.041e+06 99768 7.8782e+05 4.1362e+05 3.1702e+05 6 4 44 2 28 2 2 1 2 2 2 2 7045 4.622e+06 14 1.0125e+05 95 127 57 124 87 120 75 40 2.6582e+05 813 5.2e+05 4.8402e+05 4.5642e+05 7 4 59 1 25 1 2 1 1 1 1 2 7940 4.3303e+06 15 1.516e+05 39 101 91 113 95 110 44 32 9.2202e+05 1.1469e+06 21081 1.3626e+05 1.7217e+05 13 1 32 2 24 2 1 1 2 1 2 2 11994 4.1947e+06 15 1.7292e+05 79 62 104 58 94 115 89 22 1.6861e+05 1.3724e+05 5.5027e+05 3.0163e+05 2.3461e+05 8 4 32 1 26 1 2 2 2 2 1 2 8870 4.8186e+06 14 1.3569e+05 101 63 88 73 120 39 102 24 1.1145e+06 5.1834e+05 6.1156e+05 4.5609e+05 4.0275e+05 15 1 34 1 31 2 2 1 2 1 2 1 4250 4.4705e+06 11 2.2066e+05 83 55 127 55 46 110 121 45 1.7053e+05 1.0885e+06 72854 7.9502e+05 4.3373e+05 11 3 38 1 34 1 2 1 2 1 1 2 8702 4.3068e+06 14 2.232e+05 119 101 59 108 67 43 68 40 54743 3.9076e+05 1.9706e+05 3.8841e+05 3.3205e+05 8 4 61 1 26 1 2 2 1 2 2 1 5510 4.68e+06 15 1.1587e+05 108 63 100 51 96 86 101 39 5.7066e+05 6556 5.695e+05 2.366e+05 6.2716e+05 11 2 33 1 34 2 2 1 2 1 1 2 10654 4.3146e+06 11 1.1328e+05 42 101 85 75 64 117 125 25 6.022e+05 45436 1.7054e+05 1.9695e+05 6.4765e+05 8 2 56 1 30 2 1 1 1 2 1 2 9255 4.7529e+06 15 1.3269e+05 66 69 96 109 114 95 50 26 4.225e+05 1.0255e+06 5.3622e+05 4.5013e+05 4.8763e+05 11 1 56 1 26 1 2 2 2 1 2 2 5843 4.8526e+06 12 1.0358e+05 59 105 122 94 126 82 71 32 1.1691e+06 1.6196e+05 2.8309e+05 2.3769e+05 6.1868e+05 15 4 34 2 34 2 1 2 1 1 2 2 11611 4.2177e+06 10 1.2507e+05 60 94 63 93 70 89 43 30 1.1452e+06 8.2321e+05 45613 4.4707e+05 7.5443e+05 5 3 39 1 31 1 2 1 1 2 1 2 6227 4.5034e+06 10 1.132e+05 65 128 81 106 126 103 78 34 6.9414e+05 8.6913e+05 51949 2.618e+05 1.8862e+05 15 2 52 2 27 1 2 2 2 1 2 1 6798 3.8921e+06 12 1.5142e+05 109 67 59 93 115 128 91 25 1.1681e+06 6.9579e+05 4.0968e+05 6.3325e+05 96028 15 3 39 2 28 1 2 1 1 2 1 2 6622 3.954e+06 11 1.0631e+05 95 61 93 97 86 119 39 31 8.0619e+05 3.2302e+05 5.8428e+05 4.6342e+05 6.7062e+05 4 4 37 1 33 2 2 2 1 2 1 2 10339 4.9465e+06 14 1.6161e+05 83 108 104 110 61 69 70 28 6.8308e+05 2.3696e+05 7.0991e+05 5.5309e+05 5.633e+05 6 1 57 1 23 1 1 1 2 2 1 1 6038 4.058e+06 13 1.2777e+05 48 45 122 52 71 95 63 45 3.8451e+05 1.0123e+06 3.0109e+05 3.4506e+05 5.7412e+05 11 1 58 2 34 1 1 2 2 2 2 1 6028 3.9868e+06 12 2.1285e+05 118 96 123 39 70 71 63 42 7.6362e+05 5.9227e+05 32776 3.5979e+05 2.3785e+05 13 2 45 2 25 1 2 1 2 2 2 2 10393 3.8617e+06 13 1.561e+05 85 70 42 63 72 128 79 32 4.8811e+05 5.7915e+05 3.7296e+05 5.7336e+05 4.3716e+05 15 2 60 2 25 1 1 2 2 1 2 1 11110 4.1274e+06 10 99699 88 108 124 119 116 113 87 37 6.3287e+05 3.421e+05 5 5 5 3 4 43 1 26 2 2 2 2 1 2 2 3739 4.0081e+06 11 1.313e+05 113 51 74 115 88 102 93 31 8.506e+05 1.0112e+06 1.1333e+05 7.4276e+05 3.7876e+05 13 4 58 1 26 2 1 1 2 1 1 2 7255 4.137e+06 12 1.4287e+05 110 107 64 117 93 55 59 36 2.7242e+05 1.1865e+06 6.6025e+05 5.5564e+05 4.9857e+05 6 3 37 2 28 1 2 2 1 1 2 1 10303 4.8303e+06 13 1.9749e+05 68 81 62 93 61 99 43 26 4.5968e+05 1.1649e+06 5 5 5 9 4 40 1 31 1 2 2 1 1 1 1 7030 4.322e+06 10 2.1936e+05 104 70 61 64 118 99 62 41 8.7334e+05 9.4299e+05 7.9815e+05 7.7762e+05 3.9484e+05 11 2 44 1 31 2 2 1 1 2 1 2 6292 3.945e+06 11 1.9246e+05 51 47 69 46 77 79 42 27 3.3266e+05 6.4239e+05 2.7675e+05 1.2397e+05 1.4598e+05 15 1 36 2 34 2 1 2 1 2 2 1 11688 4.623e+06 13 1.0352e+05 52 76 107 112 123 84 120 27 7.47e+05 3.8502e+05 5 5 5 14 2 60 2 28 1 2 2 1 2 1 2 8839 4.3726e+06 12 2.1369e+05 105 128 69 64 50 77 64 23 80837 1.7681e+05 2.067e+05 1.6149e+05 1.2301e+05 12 1 46 2 35 1 2 2 1 1 1 1 3101 4.7531e+06 10 2.0641e+05 77 80 85 110 46 99 125 23 8.2155e+05 3.6444e+05 6.9843e+05 4.5276e+05 7.5819e+05 9 4 56 1 27 2 1 2 2 1 2 2 11489 4.5423e+06 11 1.8087e+05 97 96 75 86 58 104 111 31 6.8556e+05 4.4195e+05 5.7142e+05 2.2003e+05 7.3995e+05 14 4 43 2 23 1 1 1 1 1 2 1 5495 3.917e+06 13 1.6673e+05 109 99 53 46 53 92 119 30 95028 1.0561e+06 5 5 5 4 4 33 1 28 2 2 1 2 1 2 1 4151 4.3581e+06 13 1.2747e+05 110 86 97 112 108 87 128 41 8.0115e+05 3.4719e+05 5.7305e+05 82400 6.6756e+05 11 2 45 2 27 2 2 2 1 1 1 1 5463 4.864e+06 11 1.6473e+05 117 85 47 87 126 98 118 35 7.7133e+05 1.0244e+06 5 5 5 11 1 48 1 22 2 2 2 1 2 1 1 3782 4.9447e+06 11 2.2539e+05 70 100 56 45 97 126 101 33 3.2085e+05 1.3041e+05 1.9506e+05 1.178e+05 5.9851e+05 5 3 32 2 34 1 1 2 2 2 1 1 6004 4.7514e+06 13 1.8979e+05 47 114 51 82 106 93 114 31 61280 1.0363e+06 5 5 5 11 2 60 1 34 2 1 2 1 2 1 1 8189 4.3026e+06 15 2.2198e+05 106 68 124 127 101 121 39 24 47433 3.4106e+05 5 5 5 7 2 37 1 23 1 1 2 1 2 1 1 9179 4.1214e+06 14 2.202e+05 70 111 75 59 48 104 84 32 2.3284e+05 1.1435e+06 6.8463e+05 4.332e+05 2.9305e+05 15 2 61 2 32 1 1 1 2 1 2 1 10847 4.7663e+06 11 1.8663e+05 114 102 45 62 65 42 66 33 2.4288e+05 8.6783e+05 1.5275e+05 11872 60904 9 1 32 1 22 1 2 2 2 1 1 1 3153 4.1529e+06 11 2.0905e+05 69 92 42 101 107 78 112 30 5.2184e+05 96063 5 5 5 6 1 53 1 31 1 2 1 2 2 1 2 7947 4.232e+06 14 2.0219e+05 64 64 102 63 108 125 94 35 5.1068e+05 77648 5.0758e+05 4.3215e+05 8.0122e+05 6 3 60 2 35 2 2 2 2 1 1 2 12008 4.4611e+06 13 2.0546e+05 108 63 48 47 107 94 88 22 1.1194e+06 5.0542e+05 5.0953e+05 4.0541e+05 37122 14 1 56 2 28 1 2 1 1 1 2 2 3684 4.2615e+06 12 1.7455e+05 106 122 95 53 126 111 108 29 8.0124e+05 8.1616e+05 7.8447e+05 4.5272e+05 6.5642e+05 11 1 52 2 32 2 2 1 2 2 2 2 12027 4.6248e+06 13 97537 110 105 42 42 91 84 120 44 8.5482e+05 7.1576e+05 2.2808e+05 8.0474e+05 1.1887e+05 16 3 32 2 33 1 1 1 2 2 2 1 6587 4.2705e+06 14 1.6414e+05 107 39 39 83 103 52 103 45 1.0164e+06 2.2236e+05 5 5 5 15 4 59 2 34 2 1 1 1 1 2 1 11385 4.0356e+06 13 1.7191e+05 52 93 82 94 88 53 110 37 6.3856e+05 8.6483e+05 5 5 5 5 4 53 1 23 1 1 1 1 2 1 2 8258 3.9672e+06 13 1.5378e+05 109 68 122 101 97 50 118 22 5.1403e+05 6.9194e+05 6.0632e+05 6.9478e+05 7.1033e+05 15 1 57 2 33 1 1 2 2 2 1 2 4446 4.2479e+06 15 1.8708e+05 87 117 126 109 105 58 115 30 1.2008e+06 5.0868e+05 2.2304e+05 13136 2.2291e+05 12 1 57 1 28 1 2 2 1 2 2 1 6676 4.0574e+06 15 1.9255e+05 75 41 119 91 58 41 122 43 1.1383e+06 7.4242e+05 2206 3.0556e+05 7.0595e+05 7 4 33 1 22 1 1 1 2 1 1 1 3029 4.4736e+06 13 2.1772e+05 128 40 103 85 114 65 82 43 2.4359e+05 2.7308e+05 5.9221e+05 1.6906e+05 19406 7 3 39 1 30 1 2 2 1 1 2 2 11154 4.3411e+06 11 1.5561e+05 117 64 120 56 52 106 53 35 1.0737e+06 8.9172e+05 5 5 5 6 3 59 2 34 2 1 2 1 2 2 1 5328 4.1503e+06 10 1.8991e+05 92 106 111 61 75 127 48 30 9.1061e+05 4.0096e+05 6.4445e+05 4.0826e+05 7.189e+05 6 1 44 1 30 1 2 2 2 2 2 1 7864 4.7112e+06 13 1.6132e+05 40 61 88 64 120 106 49 38 3.9761e+05 1.11e+05 5 5 5 15 4 52 1 26 2 2 1 1 2 2 1 9743 4.035e+06 11 2.2298e+05 128 127 42 72 121 105 98 32 7.3395e+05 3.5132e+05 5 5 5 12 2 33 2 33 2 2 1 1 1 1 1 12094 5.018e+06 15 2.046e+05 123 127 44 78 72 125 73 42 8.7011e+05 5.5695e+05 5 5 5 11 3 49 1 25 1 2 1 2 2 2 2 7620 4.7873e+06 15 2.2232e+05 59 59 81 52 101 99 46 42 9.5866e+05 1.2532e+05 6.5882e+05 6.6202e+05 6.3334e+05 16 1 55 2 33 2 2 1 1 2 1 1 11101 4.1332e+06 10 2.1335e+05 82 115 74 78 44 116 124 40 1.8405e+05 2.3361e+05 5 5 5 8 4 43 1 35 2 1 2 2 2 1 2 9577 4.6134e+06 12 1.3704e+05 116 45 42 118 101 73 110 31 2.7724e+05 2.9084e+05 2.0099e+05 6.2885e+05 7.3481e+05 12 3 41 2 34 1 2 2 1 1 2 1 3177 4.7069e+06 14 1.1572e+05 97 122 108 103 93 111 91 41 1.1721e+06 6.5979e+05 2.4901e+05 71275 2.846e+05 14 4 57 2 29 2 1 2 1 2 2 1 3299 3.8504e+06 12 2.0457e+05 68 40 64 67 84 80 68 32 3.2526e+05 2.6951e+05 4.4772e+05 5.8144e+05 3.0829e+05 7 3 53 1 28 2 2 1 2 2 2 2 6513 4.6864e+06 14 1.5003e+05 78 126 64 80 125 95 59 43 1.0453e+06 8502 4.6291e+05 1.9345e+05 7.522e+05 8 1 39 2 23 2 2 1 1 1 2 2 5734 3.8717e+06 12 1.359e+05 114 41 41 124 106 73 74 40 7.0126e+05 9.4178e+05 5.5812e+05 2.8183e+05 4.937e+05 4 3 52 1 33 2 2 2 1 2 1 1 8538 4.0309e+06 10 2.1962e+05 50 68 84 119 75 48 76 22 6.6651e+05 2.873e+05 5.7137e+05 3.9792e+05 2.534e+05 13 4 34 2 33 1 2 2 1 1 1 2 4009 4.5113e+06 12 1.5425e+05 78 125 88 48 81 75 123 34 6.3117e+05 1.7421e+05 4.1781e+05 53563 3.9949e+05 15 3 45 2 31 2 1 1 1 1 1 1 8781 4.0651e+06 13 1.7694e+05 88 57 110 47 80 123 80 22 6.3763e+05 3.5245e+05 7.2816e+05 5.6314e+05 7.6684e+05 16 3 54 1 31 2 2 1 2 1 1 1 9446 4.5515e+06 10 1.9657e+05 83 119 125 54 122 126 62 25 1.1504e+06 4.8076e+05 7.9435e+05 4.9459e+05 6.5574e+05 16 1 39 2 30 2 1 1 1 2 2 2 3260 3.9666e+06 14 1.5681e+05 93 118 127 84 94 57 65 45 4.4213e+05 1.0431e+06 5.1035e+05 4.0744e+05 1.8772e+05 14 3 45 2 35 2 2 1 2 1 1 2 10704 3.9045e+06 11 1.41e+05 62 110 117 78 83 81 47 42 82420 7.9267e+05 2.0104e+05 6.095e+05 6.3407e+05 4 1 55 1 30 2 2 2 1 2 1 1 9369 4.2596e+06 14 1.1102e+05 115 61 39 115 40 43 68 36 1.1463e+06 1.0373e+05 6.5197e+05 44514 3.1283e+05 12 4 41 2 25 1 1 1 2 1 2 2 5628 4.685e+06 14 1.5366e+05 42 73 58 54 47 100 80 26 34963 4.8587e+05 5 5 5 11 1 52 2 26 1 2 2 1 2 1 2 5616 4.4127e+06 13 1.6661e+05 54 51 85 56 68 105 98 42 48740 3.2395e+05 7.9375e+05 1.4909e+05 6.9624e+05 15 3 56 1 25 2 2 2 1 1 1 1 3981 4.9635e+06 11 1.497e+05 93 87 118 118 92 95 128 29 4.6528e+05 1.0676e+06 3.1127e+05 5.7784e+05 24877 8 3 43 1 24 1 2 2 1 1 2 2 5162 4.6331e+06 10 2.1489e+05 85 117 60 123 122 73 59 28 5.335e+05 1.0591e+05 5 5 5 12 4 49 2 34 2 1 2 2 1 2 2 8846 4.3839e+06 14 1.0565e+05 127 115 104 59 42 73 50 22 3.2239e+05 3.5344e+05 5 5 5 6 2 55 1 28 2 1 1 1 2 1 1 5536 4.7604e+06 12 1.4804e+05 96 95 93 80 120 123 64 44 6.0918e+05 7.7703e+05 2.7812e+05 408 1.9466e+05 14 4 47 1 32 1 2 2 2 1 2 1 10592 4.6017e+06 14 1.2947e+05 41 58 120 58 41 114 104 32 55490 7.8029e+05 3.7529e+05 44343 3.6927e+05 3 3 61 1 27 2 2 2 2 2 2 2 7537 4.182e+06 12 1.2212e+05 60 71 75 90 71 117 114 32 5.4556e+05 5.4804e+05 5 5 5 12 2 33 2 22 1 2 2 2 1 2 2 3104 4.1636e+06 10 1.9498e+05 128 122 114 115 96 66 56 26 1.4483e+05 1.1125e+06 7.1682e+05 3.2588e+05 5.9883e+05 12 4 39 1 25 1 1 2 2 2 2 1 7102 4.3602e+06 14 97735 103 126 120 75 39 95 110 42 95488 6.1439e+05 5.861e+05 2.2038e+05 3.3502e+05 12 1 57 2 29 2 1 2 1 2 2 1 11022 4.0487e+06 12 1.5758e+05 43 85 61 43 112 95 72 44 3.8068e+05 8.4205e+05 5 5 5 15...
class(HCV)
ans = 'table'
size(HCV)
ans = 1×2
1385 29
% No missing values
sum(sum(ismissing(HCV)))
ans = 0
% Dealing only with two Stages - Advanced Fibrosis and Moderate Fibrosis
HCV.Baselinehistologicalstaging(HCV.Baselinehistologicalstaging<3)=0;
HCV.Baselinehistologicalstaging(HCV.Baselinehistologicalstaging>2)=1;
% Categorical to Binary
for i=[2,4:10]
HCV.(i)(HCV.(i)==1)=0; %Absence
HCV.(i)(HCV.(i)==2)=1; %Presence
end
% Create array of the data
HCV_array=table2array(HCV);
The dataset contains two data types - categorical and numeric respectively.
%Summary Statistics of continuous/numeric variables
cont_array=HCV_array(:,[1,3,11:27])
cont_array = 1385×19
56 35 7425 4248807 14 112132 99 84 52 109 81 5 5 5 655330 634536 288194 5 5 46 29 12101 4429425 10 129367 91 123 95 75 113 57 123 44 40620 538635 637056 336804 31085 57 33 4178 4621191 12 151522 113 49 95 107 116 5 5 5 571148 661346 5 735945 558829 49 33 6490 4794631 10 146457 43 64 109 80 88 48 77 33 1041941 449939 585688 744463 582301 59 32 3661 4606375 11 187684 99 104 67 48 120 94 90 30 660410 738756 3731527 338946 242861 58 22 11785 3882456 15 131228 66 104 121 96 65 73 114 29 1157452 1086852 5 5 5 42 26 11620 4747333 12 177261 78 57 113 118 107 84 80 28 325694 1034008 275095 214566 635157 48 30 7335 4405941 11 216176 119 112 80 127 45 96 53 39 641129 72050 787295 370605 506296 44 23 10480 4608464 12 148889 93 83 55 102 97 122 39 45 591441 757361 5 371090 203042 45 30 6681 4455329 12 98200 55 68 72 127 81 125 43 30 1151206 230488 267320 275295 555516
mean_val = mean(cont_array);
std_val= std(cont_array);
median_val = median(cont_array);
min_val = min(cont_array);
max_val = max(cont_array);
range_val = range(cont_array);
[corr_val, p_val] = corr(cont_array,HCV_array(:,29),'type','Spearman');
summary_table1 = sortrows(array2table([mean_val;std_val;median_val;min_val;max_val;range_val;corr_val';p_val']', ...
'VariableNames',{'mean','stddev','median','min','max','range','Spearman_Correlation_Coefficient','P-value'}, ...
'RowNames',HCV.Properties.VariableNames([1,3,11:27])),'Spearman_Correlation_Coefficient','descend','ComparisonMethod','abs')
summary_table1 = 19×8 table
 meanstddevmedianminmaxrangeSpearman_Correlation_CoefficientP-value
1 BMI28.60874.076229223513-0.07030.0088
2 RNAEF2.9138e+052.6770e+0524404958103338103280.05170.0543
3 ALTafter24w33.43837.073634545400.03950.1419
4 RNA122.8875e+052.8535e+052343595373152737315220.03720.1660
5 RNABase5.9095e+053.5394e+0559310311120108612010750.03580.1824
6 RNA46.0090e+053.6232e+05597869512017151201710-0.03190.2355
7 Age46.31918.781546326129-0.02430.3656
8 ALT183.916225.92288339128890.02360.3806
9 Plat1.5835e+053.8795e+0415791693013226464133451-0.02040.4490
10 AST182.774725.9932833912889-0.01400.6031
11 ALT4883.629626.2240835128123-0.01120.6777
12 ALT483.405826.5297823912889-0.00900.7389
13 WBC7.5334e+032.6682e+03749829911210191100.00860.7480
14 ALT1283.510526.06458439128890.00480.8595
15 ALT2483.709026.2060833912889-0.00430.8728
16 RNAEOT2.8766e+052.6456e+052513765808450808445-0.00370.8918
17 ALT3683.117726.39908451281230.00370.8920
18 RBC4.4221e+063.4636e+054438465381642250184511202029-0.00320.9047
19 HGB12.58771.713513101550.00020.9946
% Top 8 rank correlated numeric features
head(summary_table1)
ans = 8×8 table
 meanstddevmedianminmaxrangeSpearman_Correlation_CoefficientP-value
1 BMI28.60874.076229223513-0.07030.0088
2 RNAEF2.9138e+052.6770e+0524404958103338103280.05170.0543
3 ALTafter24w33.43837.073634545400.03950.1419
4 RNA122.8875e+052.8535e+052343595373152737315220.03720.1660
5 RNABase5.9095e+053.5394e+0559310311120108612010750.03580.1824
6 RNA46.0090e+053.6232e+05597869512017151201710-0.03190.2355
7 Age46.31918.781546326129-0.02430.3656
8 ALT183.916225.92288339128890.02360.3806
% Summary Statistics for categorical variables
categorical_dat=HCV(:,[2,4:10]);
cat = HCV_array(:,[2,4:10]);
prevalent_val = mode(cat);
count_val = countcats(categorical(cat),1);
perc_val = count_val/length(cat);
%Chi-square Statistic
chi2=zeros(8,1);
p=zeros(8,1);
for i=1:8
[~,chi2(i),p(i)] = crosstab(categorical_dat.(i),HCV.Baselinehistologicalstaging);
end
summary_table2 = sortrows(array2table([prevalent_val',count_val',perc_val'*100,chi2,p],"RowNames",HCV.Properties.VariableNames([2,4:10]),'VariableNames',{'Mode','Count_Absent','Count_Present','Percent_Absent','Precent_Present','Chi_Square-statistic','P-value'}),'P-value')
summary_table2 = 8×7 table
 ModeCount_AbsentCount_PresentPercent_AbsentPrecent_PresentChi_Square-statisticP-value
1 NauseaVomting168969649.747350.25274.95070.0261
2 Epigastricpain168769849.602950.39713.09120.0787
3 Gender070767851.046948.95312.27020.1319
4 Fever167171448.447751.55230.50900.4756
5 Fatiguegeneralizedboneache069469150.108349.89170.00610.9380
6 Diarrhea168969649.747350.25270.00550.9410
7 Headache069868750.397149.60290.00490.9440
8 Jaundice169169449.891750.10830.00090.9763

DATA VISUALIZATION

Binary Data
k=1;
m=1;
for i=[2,4:10]
figure(m)
subplot(2,2,k)
h=heatmap(HCV,'Baselinehistologicalstaging',HCV.Properties.VariableNames(i));
h.Title={''};
h.XLabel={'Stage'};
k=k+1;
if k>4
m=m+1;
k=1;
end
end
figure()
h=heatmap(HCV,'BaselinehistologicalGrading','Baselinehistologicalstaging');
Continuous Data
m=4;
for i=[1,3,11:28]
figure(m)
subplot(2,2,k)
hold on
histogram(HCV.(i)(HCV.(29)==0))
histogram(HCV.(i)(HCV.(29)==1))
xlabel(HCV.Properties.VariableNames(i));
legend({'Moderate Stage','Advanced Stage'})
hold off
k=k+1;
if k>4
m=m+1;
k=1;
end
end

Biological Significance

% Observe RNA levels and ALT levels for moderate and advanced fibrosis
moderateFib=HCV(HCV.Baselinehistologicalstaging==0,:);
advancedFib=HCV(HCV.Baselinehistologicalstaging==1,:);
mod=table2array(moderateFib);
adv=table2array(advancedFib);
figure();
hold on
plot([0,4,12,18],mean(mod(:,23:26)))
ylabel('RNA Levels throughout the treatment','LineWidth',2,'LineStyle',"--")
xlabel('Treatment duration(months)')
plot([0,4,12,18],mean(adv(:,23:26)),'Color','r','LineWidth',2)
legend('Moderate Fibrosis','Advanced Fibrosis')
hold off
%230th individual ideal case
figure();
hold on
plot([1,4,12,24,36,48]/12,mean(mod(:,16:21)),'LineWidth',2,'LineStyle',"--" )
ylabel('ALT Levels throughout the treatment')
xlabel('Initial Treatment duration (months)')
plot([1,4,12,24,36,48]/12,mean(adv(:,16:21)),'Color','r','LineWidth',2)
legend('Moderate Fibrosis','Advanced Fibrosis')
hold off
%445th individual ideal case
We see that there is a drastic difference between the ALT levels of individuals with moderate fibrosis and those with advanced fibrosis while the RNA levels are close by. The surprising thing is that for both the groups the ALT levels fall lowest at the 3rd month and start rising up again. I believe the ALT levels keep fluctuating and slowly come to a constant by the end of treatment which is the 18th month.

Min-max normalization of data

X=HCV_array(:,1:28);
colmin = min(X); colmax = max(X);
X = rescale(X, 'InputMin', colmin, 'InputMax', colmax);
Y=HCV_array(:,29);
HCV_new=array2table([X,Y],"VariableNames",HCV.Properties.VariableNames)
HCV_new = 1385×29 table
 AgeGenderBMIFeverNauseaVomtingHeadacheDiarrheaFatiguegeneralizedboneacheJaundiceEpigastricpainWBCRBCHGBPlatAST1ALT1ALT4ALT12ALT24ALT36ALT48ALTafter24wRNABaseRNA4RNA12RNAEOTRNAEFBaselinehistologicalGradingBaselinehistologicalstaging
10.827601.000010001110.48670.35970.80000.14330.67420.50560.14610.78650.47190000.54560.52800.0772000.76920
20.482800.538501101101.00000.510000.27240.58430.94380.62920.40450.83150.42280.95930.97500.03380.44820.17070.41660.03840.07690
30.862100.846211110000.13030.66950.40000.43840.83150.11240.62920.76400.86520000.47550.550300.91030.68960.07691
40.586210.846201010100.38410.813800.40050.04490.28090.78650.46070.55060.34960.58540.70000.86750.37440.15700.92090.71860.53851
50.931000.769200101110.07350.65720.20000.70940.67420.73030.31460.10110.91010.72360.69110.62500.54980.61471.00000.41930.29970.61540
60.89661011101100.96530.05491.00000.28640.30340.73030.92130.64040.29210.55280.88620.60000.96370.90440000.07691
70.344810.307700111110.94720.77440.40000.63130.43820.20220.83150.88760.76400.64230.60980.57500.27120.86040.07370.26540.78380.69231
80.551710.615400110010.47680.49040.20000.92290.89890.82020.46070.98880.06740.73980.39020.85000.53380.06000.21100.45840.62480.69231
90.413800.076900111010.82210.65890.40000.41870.60670.49440.17980.70790.65170.95120.27641.00000.49240.630200.45900.25060.15380
100.448300.615410110010.40500.53150.40000.03890.17980.32580.37080.98880.47190.97560.30890.62500.95850.19180.07160.34050.68550.07690
110.172410.153810101100.15870.37320.40000.54710.71910.95510.80900.39330.15730.95930.78050.70000.85180.08590.19610.55470.07400.92310
120.13790011000000.33600.26110.60000.38410.40450.11240.60670.14610.07870.33330.43901.00000.11460.93450.15050.07810.99491.00000
130.448310.230810001010.69020.25000.60000.82420.65170.69660.30340.15730.62920.40650.80490.52500.77970.44680000.38460
140.06900001001100.29060.42030.80000.36040.91010.24720.28090.13480.43820.69110.87800.45000.32720.73590.15730.22610.96520.46150
150.275910.769211101000.93480.29050.80000.97310.98880.93260.75280.74160.55060.86180.86180.77500.94390.92520.11290.54120.15380.38460
160.896600.923110001000.55370.89850.20000.42330.87640.15730.12360.46070.91010.49590.65850.72500.51200.26150.02240.83060.16680.92310
171.000001.000001110010.86190.290200.78400.52810.74160.34830.52810.49440.66670.34150.70000.74940.600400001
180.793110.153810111110.60210.54110.80000.52650.15730.69660.12360.62920.82020.74800.51220.55000.95360.19220.12270.39380.31640.07691
190.827600.384601111110.39600.52581.00000.73160.15730.95510.25840.41570.20220.33330.71540.52500.42190.29890.19920.50120.20110.23081
200.103410.076911000010.20350.515500.14090.85390.92130.26970.98880.62920.96750.71540.92500.89960.06360.19220.50010.58951.00001
210.862110.076900110000.32210.17901.00000.17640.52810.78650.88760.89890.17980.79670.64230.67500.14120.65410.17930.65700.34860.23080
220.034500.230810111110.76410.980300.12000.50560.42700.31460.47190.87640.51220.30080.67500.94510.47660000.07690
230.310300.076901111010.54560.64810.80000.01290.06740.59550.71910.73030.01120.89430.71540.70000.24430.36660.01420.02260.23120.53851
240.241410.538501010010.45500.672900.88680.34830.70790.41570.21350.80900.73170.43090.50000.82750.82600.02590.41420.94130.92311
250.034510.153801111010.33660.40290.20000.96760.25840.58430.86521.00000.02250.52850.82110.95000.20270.81660.00330.44530.92970.23081
260.379310.923111100000.40140.59311.00000.12630.10110.82020.67420.51690.22470.66670.59350.75000.79540.44990000.46151
270.655200.923110110010.88270.19651.00000.01120.02250.16851.00000.28090.35960.68290.66670.72500.63800.44210.19770.90900.46010.15380
280.241410.846210100010.24620.90660.40000.72910.24720.91010.83150.40450.55060.88620.76420.95000.40500.03830.01220.90700.02410.92310
290.862110.307701100010.33450.78770.60000.25260.13480.88760.66290.03370.60670.39020.63411.00000.23760.15530000.46151
300.517210.538500101010.31340.77961.00000.08780.91010.37080.87640.97750.06740.73170.35770.82500.35480.20620.20550.46650.38890.46150
310.793110.846201101010.26260.15221.00000.66880.64040.11240.22470.55060.25840.43090.61790.90000.99440.77280.00800.15370.30120.30770
320.896611.000011110000.48160.151800.81000.20220.79781.00000.64040.33710.81300.54470.52500.46430.23940.16710.08270.04320.15380
330.517210.230810111010.49340.65150.20000.55710.61800.28090.16850.93260.28090.47970.73980.47500.50290.34640.08670.88650.83740.38460
341.000000.846201110000.96370.63620.60000.24450.03370.08990.48310.70790.10110.57720.39020.72500.96570.26500.12410.47170.34910.53850
350.172400.384611011110.37870.21550.20000.19280.03370.88760.31460.80900.10110.82930.78051.00000.22700.07620.10840.83380.29930.61541
360.310300.538501001010.80270.27970.80000.20831.00000.70790.44940.26970.46070.65850.99190.87500.97010.04290.09840.72730.92101.00001
370.965510.769211011100.48010.17200.80000.97010.14610.97750.31460.97750.97750.29270.39840.75000.09360.40700.12370.41560.35450.15380
380.758600.538500011000.84670.911100.58980.01120.04490.07870.28090.69660.32520.69920.95000.03930.48350.21160.32520.14680.92310
390.275910.461510100000.00200.44740.80000.01940.33710.21350.25840.12360.23600.64230.88620.65000.80040.05920.00760.03840.00170.46150
40000.692301000010.76450.10240.80000.77500.43820.47190.57300.10110.32580.63411.00000.85000.71190.08530.10920.27210.50040.30771
410.896610.846201110110.39910.06090.80000.67350.75280.33710.98880.67420.08990.79670.86180.70000.87220.26660.09360.67640.79470.76920
420.172410.076911001100.81250.40950.20000.68260.60670.19100.01120.95510.69660.36590.69110.70000.22590.17170.04050.38060.21540.23080
430.896600.076900010010.79530.81650.80000.06370.98880.59550.61800.83150.64040.98370.83740.85000.22690.88310.06190.24990.36260.84621
440.137900.076911011110.15320.76490.80000.63440.46070.10110.41570.91010.48310.86180.78050.95000.49480.96270.11700.90080.84570.15381
450.517211.000001101110.98060.07160.80000.03580.55060.56180.08990.10110.26970.42280.88620.52500.54260.35180.11040.11320.46450.69231
460.620700.846211001010.78470.627300.87530.74160.86520.75280.80900.50560.82930.53660.75000.04820.72210.20330.39540.19720.76920
470.413800.692300001010.05260.32950.40000.42960.84270.32580.98880.08990.87641.00000.39020.67500.62530.68700.09540.12490.78260.61541
480.379300.846200111010.26660.387300.70420.83150.01120.03370.03370.43820.82110.42280.57500.61650.99640.04840.64880.36010.92310
490.758600.846210010100.43600.961700.72230.96630.10110.88760.22470.34830.81300.85370.50000.70900.58210.18180.94720.67430.46151
500.931010.307710000010.35760.42530.40000.33780.68540.78650.94380.51690.62920.69110.54470.55000.57120.04120.01650.87150.30020.92310
510.034510.692300010100.23080.71811.00000.22690.50560.35960.25840.87640.48310.90240.81300.92500.81030.83110.20260.21650.29810.38461
520.827610.076900011110.19820.19780.80000.62110.34830.21350.68540.82020.46070.57720.78050.95000.42370.10880.01160.37180.11370.76920
530.310300.846210001110.22500.27180.60000.20880.85390.60670.78650.10110.19100.57720.77240.95000.09910.30480.00360.71140.40920.38460
540.931000.769210000110.42920.50360.40000.89370.50560.02250.51690.65170.05620.68290.97560.67500.21130.21460.10250.83210.47530.15380
550.517200.384611000000.46820.10250.80000.78520.17980.50560.95510.22470.83150.31710.50410.97500.49840.47770.09540.09710.74920.76920
560.620710.923100011000.57390.15560.80000.93430.11240.16850.98880.236000.85370.33330.45000.12460.17360.07140.06640.59400.61540
570.241410.615401000000.15660.694700.68190.48310.40450.15730.40450.62920.73980.31710.67500.19370.73250.04690.22420.22720.84621
580.551700.846200100010.36960.27681.00000.20950.01120.31460.01120.93260.35960.91060.86180.62500.29190.12520.15680.00780.66310.76921
59010.384600001010.62330.66540.20000.34200.40450.07870.19100.91010.50560.69920.41460.45000.41870.49750.09100.14470.97980.76920
600.034510.153800010010.70740.24950.60000.27970.44940.26970.79780.16850.02250.78860.29270.55000.84140.95880.02500.79100.078500
610.655210.307711111110.89500.854900.54020.38200.38200.23600.48310.76400.60160.30080.67500.16010.38960.02610.33290.20170.38461
620.620710.076910100110.22710.51410.60000.33900.43820.26970.21350.74160.83150.42280.68290.60000.30420.19920.20250.35900.79450.69231
630.344810.076900110000.52410.12110.60000.24440.78650.56180.03370.23600.03370.54470.40650.52500.51630.45330.19850.57950.25710.69230
640.551710.846210111010.86590.23411.00000.53040.73030.97750.76400.39330.71910.74800.88620.82500.08820.17980.07960.93470.96260.46150
650.448300.692301110110.93290.05211.00000.63540.92130.29210.35960.05620.68541.00000.85370.55000.35040.92340.17570.93190.68440.46150
660.896600.461501110000.11980.76090.60000.94850.66290.52810.15730.88761.00000.79670.92680.52500.14580.05730.16950.25690.31950.53851
670.137910.692301011100.22910.69861.00000.79450.56180.66290.05620.22470.95510.29270.32520.57500.98060.59430.02490.38860.12680.15380
680.62071010100110.17440.55960.80000.58060.34830.71910.14610.57300.89890.29270.42280.60000.49140.24180.06880.11710.67140.84621
690.275910.076900011010.54800.878300.93330.75280.98880.61800.56180.66290.60980.45530.75000.60600.38760.10260.95920.03020.07690
700.172400.384611001110.27540.13420.80000.82780.89890.05620.20220.82020.78650.80490.41460.70000.77990.61370.06090.50730.78580.92311
710.241401.000001100100.10590.41581.00000.14430.76400.98880.66290.86520.58430.88620.57720.90000.52690.37460.09870.46590.86880.15380
720.758610.384610010010.71800.64201.00000.52120.20220.43820.86520.84270.01120.41460.73170.50000.55860.96420.18930.32160.23510.69230
730.379310.076911011000.06190.65370.60000.50120.59550.24720.30340.46070.49440.60980.82110.77500.27280.77220.14870.46400.19440.07691
741.000000.230800111110.14540.55140.40000.43850.35960.83150.49440.61800.06740.35771.00000.62500.86670.08300.21110.51160.39120.23081
750.413810.461511011110.44500.67020.80000.06180.62920.98880.20220.95510.53930.93500.56910.87500.22130.00070.13940.59870.56320.30771
760.931000.230801000010.54320.42751.00000.439000.69660.58430.83150.62920.85370.31710.67500.76760.95440.00560.16850.21250.76920
77010.153810010110.98830.31471.00000.59870.44940.25840.73030.21350.61800.89430.68290.42500.14040.11420.14750.37310.28950.38461
78000.307701111010.64530.83370.80000.31980.69660.26970.55060.38200.91010.27640.78860.47500.92790.43130.16390.56420.49700.92310
790.069000.692311010100.13820.54420.20000.95650.49440.17980.98880.17980.07870.85370.94311.00000.14200.90580.01950.98340.53520.61541
800.206900.923101010010.62690.40800.80000.97550.89890.69660.22470.77530.31460.30890.51220.87500.04560.32520.05280.48040.40980.38461
811.000000.307701101100.27650.71851.00000.17130.77530.26970.68540.13480.64040.65850.78050.85000.47510.00550.15260.29260.77400.61540
820.034500.923111010010.84120.41450.20000.15190.03370.69660.51690.40450.28090.91060.97560.50000.50140.03780.04570.24360.79920.38460
830.827600.615410001010.68760.77911.00000.29730.30340.33710.64040.78650.84270.73170.36590.52500.35180.85330.14370.55680.60180.61540
840.827600.307701110110.31310.86210.40000.07910.22470.74160.93260.61800.97750.62600.53660.67500.97340.13480.07590.29400.76350.92311
850.069010.923110100110.94620.333900.24020.23600.61800.26970.60670.34830.68290.30890.62500.95340.68500.01220.55300.93100.15381
860.241400.692301001010.35520.571500.15130.29211.00000.47190.75280.97750.79670.59350.72500.57790.72320.01390.32380.23280.92310
870.689710.384601110100.41790.06300.40000.43770.78650.31460.22470.60670.85391.00000.69920.50000.97250.57900.10980.78330.11850.92311
880.241410.461501001010.39860.11450.20000.09960.62920.24720.60670.65170.52810.92680.27640.65000.67120.26880.15660.57320.82760.07691
890.172400.846211101010.80660.94010.80000.51400.49440.77530.73030.79780.24720.52030.52850.57500.56870.19720.19020.68410.69510.23080
900.862100.076900011000.33450.20100.60000.26040.10110.06740.93260.14610.35960.73170.47151.00000.32010.84240.08070.42680.70850.61540
910.896610.923100111100.33340.14180.40000.89800.88760.64040.943800.34830.53660.47150.92500.63580.49290.00880.44500.29350.76920
920.448310.230801011110.81250.03760.60000.47270.51690.34830.03370.26970.37081.00000.60160.67500.40640.48190.09990.70920.53950.92310
930.965510.230800110100.89120.258700.05010.55060.77530.95510.89890.86520.87800.66670.80000.52690.284700001
940.379300.307711110110.08210.15950.20000.28690.83150.13480.39330.85390.55060.78860.71540.65000.70820.84150.03040.91870.46740.76921
950.896600.307710010010.46810.26670.40000.37360.79780.76400.28090.87640.60670.40650.43900.77500.22680.98730.17690.68730.61530.23081
960.172410.461501100100.80260.84350.60000.78290.32580.47190.25840.60670.24720.76420.30890.52500.38270.96930000.46151
970.275900.692301100000.44340.420600.94680.73030.34830.24720.28090.88760.76420.46340.90000.72710.78470.21390.96190.48730.61540
980.413800.692311001010.36230.10690.20000.74520.13480.08990.33710.07870.42700.60160.30080.55000.27700.53460.07420.15330.18010.92310
990.137910.923110101100.95470.67100.60000.07870.14610.41570.76400.82020.94380.64230.93500.55000.62190.32040000.84620
1000.965510.461501101010.64190.46270.40000.90430.74161.00000.33710.28090.12360.58540.47970.45000.06730.14710.05540.19980.15180.69230
â‹®

FEATURE SELECTION

Method 0 : TTests

for i=1:28
[h0(i),p0(i)]=ttest2(HCV_new.(i)(HCV_new.Baselinehistologicalstaging==0),HCV_new.(i)(HCV_new.Baselinehistologicalstaging==1));
end
sortrows(array2table([h0',p0'],'RowNames',HCV_new.Properties.VariableNames(1:28),'VariableNames',{'Significant/Non-significant','P-value'}),'P-value')
ans = 28×2 table
 Significant/Non-significantP-value
1 BMI10.0092
2 NauseaVomting10.0261
3 RNAEF10.0394
4 Epigastricpain00.0788
5 BaselinehistologicalGrading00.1271
6 Gender00.1321
7 ALTafter24w00.1648
8 RNABase00.1834
9 RNA400.2288
10 RNA1200.2680
11 Age00.3591
12 ALT100.3929
13 Plat00.4424
14 Fever00.4759
15 AST100.5987
16 ALT4800.6603
17 RNAEOT00.7020
18 WBC00.7548
19 ALT400.7675
20 ALT2400.8578
21 ALT1200.8641
22 RBC00.8968
23 ALT3600.9179
24 Fatiguegeneralizedboneache00.9380
25 Diarrhea00.9411
26 Headache00.9441
27 Jaundice00.9763
28 HGB00.9900
A p-value of 0.0092,0.0261 and 0.0394 against significance level of 0.05 means that the two groups can be differentiated and that the variable is significant. Thus, according to the ttest BMI, NauseaVomting and RNAEF are significant variables.
%Correlated data
HCV_test=array2table([HCV_new.BMI, HCV_new.RNAEF, HCV_new.NauseaVomting, HCV_new.Baselinehistologicalstaging],'VariableNames',{'BMI','RNAEF','NauseaVomting','Stage'});
corrplot(HCV_test)
The low correlation value can be explained by the slopes of least-squares reference lines.
% Boxplots of significant variables
figure()
subplot(1,3,1)
boxplot(HCV_test.BMI,HCV_test.Stage)
ylabel('BMI')
xlabel('Cirrhosis Severity')
subplot(1,3,2)
boxplot(HCV_test.NauseaVomting,HCV_test.Stage)
ylabel('Nausea and Vomiting')
xlabel('Cirrhosis Severity')
subplot(1,3,3)
boxplot(HCV_test.RNAEF,HCV_test.Stage)
ylabel('RNAEF levels')
xlabel('Cirrhosis Severity')

METHOD 1

The data has continuous([1,3,11:27]) as well as ordinal data([2,4:10,28,29]) and so on refering https://medium.com/@outside2SDs/an-overview-of-correlation-measures-between-categorical-and-continuous-variables-4c7f85610365,
We use bias corrected Cramer’s V (derived from Chi-Square) to measure association between categorical variables, logistic regression to understand association between categorical and continuous variable pairs and rank-based correlation metrics such as Spearman to understand association between continuous variables.
Note : Catagorical data doesn't come from normal distribution and hence, as the output is categorical we cannot use linear regression (assumes normality)

Association between categorical-categorical variables

categorical_dat=HCV_new(:,[2,4:10,28]);
Categorical variables with 2 levels
chi2=zeros(8,1);
p1=zeros(8,1);
for i=1:8
[~,chi2(i),p1(i)] = crosstab(categorical_dat.(i),HCV_new.Baselinehistologicalstaging);
end
chi-square statistics is sensitive to sample size and so bias corrected cramer's V is preferred
For 2x2 contingency table , bias corrected cramer's V=phi-coefficient
bias_cramerV=sqrt(chi2/height(HCV));
Categorical variable with more than 2 levels
Baselinehistologicalgrading is a categorical variable which needs to be tested for it's strength of association. For 14x2 contingency table,
[tbl1,chi2(9,1),p1(9,1)] = crosstab(HCV_new.BaselinehistologicalGrading,HCV_new.Baselinehistologicalstaging);
%For bias corrected cramer's V
tbl1_size=size(tbl1);
bias_cramerV(9,1)=sqrt(max([0,(chi2(9,1)/1385)-((tbl1_size(1)-1)*(tbl1_size(2)-1))/(height(HCV)-1)])/min([tbl1_size(2)-((tbl1_size(2)-1)^2)/(height(HCV)-1),tbl1_size(1)-((tbl1_size(1)-1)^2)/(height(HCV)-1)]));
Association of categorical variables with Stages is as follows -
categorical_summary=sortrows(array2table([chi2,p1,bias_cramerV],"RowNames",categorical_dat.Properties.VariableNames,'VariableNames',{'Chi-Square Statistic','P-value','CramersV_Strength'}),'P-value','ascend')
categorical_summary = 9×3 table
 Chi-Square StatisticP-valueCramersV_Strength
1 NauseaVomting4.95070.02610.0598
2 Epigastricpain3.09120.07870.0472
3 Gender2.27020.13190.0405
4 Fever0.50900.47560.0192
5 BaselinehistologicalGrading9.13340.76280
6 Fatiguegeneralizedboneache0.00610.93800.0021
7 Diarrhea0.00550.94100.0020
8 Headache0.00490.94400.0019
9 Jaundice0.00090.97630.0008
On observing the above table, we see that according to strength of association (given by cramer's V coefficient) and p-value, if significance level 5% is considered, only NauseaVomiting seems to be a significant variable.

Association between categorical-continuous variables

continuous_var=HCV_new(:,[1,3,11:27]);
[B,dev,stats]=mnrfit(X(:,[1,3,11:27]),categorical(Y))
B = 20×1
-0.1123 0.1901 0.4397 -0.1000 0.0302 -0.0278 0.1407 0.1238 -0.1801 0.0364
dev = 1.8968e+03
stats = struct with fields:
beta: [20×1 double] dfe: 1365 sfit: 1.0073 s: 1 estdisp: 0 covb: [20×20 double] coeffcorr: [20×20 double] se: [20×1 double] t: [20×1 double] p: [20×1 double] resid: [1385×2 double] residp: [1385×2 double] residd: [1385×1 double]
ans = 20×1
0.8131 0.2905 0.0118 0.5908 0.8733 0.8613 0.4535 0.5060 0.3375 0.8423
sortrows(array2table([B(2:end),stats.p(2:end)],"RowNames",continuous_var.Properties.VariableNames,'VariableNames',{'B-value','P-value'}),'P-value','ascend')
ans = 19×2 table
 B-valueP-value
1 BMI0.43970.0118
2 RNAEF-0.38830.0445
3 RNAEOT0.31690.1026
4 ALTafter24w-0.41990.1732
5 RNABase-0.23100.2119
6 RNA40.19320.2856
7 Age0.19010.2905
8 ALT1-0.18010.3375
9 RNA12-0.66560.4157
10 Plat0.14070.4535
11 AST10.12380.5060
12 ALT480.16310.5254
13 WBC-0.10000.5908
14 ALT240.04520.8075
15 ALT40.03640.8423
16 HGB-0.02780.8613
17 RBC0.03020.8733
18 ALT12-0.02800.8807
19 ALT36-0.00660.9791
Here at 5% significance level, BMI followed by RNAEF are most significant.

Thus, NauseaVomiting, BMI and RNAEF are significant variables. Similar to Method 0.

METHOD 2

Only one method can be used for both types of features using embedded type feature selection. For this particular function when we give input as a table it assumes last column to be response variable and treats all categorical or logical variables as categorical.
https://www.mathworks.com/help/stats/feature-selection.html

Embedded Type Feature Selection

stepwiseglm(HCV)
1. Adding BMI, Deviance = 344.1219, FStat = 6.810953, PValue = 0.009157848 2. Adding NauseaVomting, Deviance = 342.8689, FStat = 5.050527, PValue = 0.02477567
ans =
Generalized linear regression model: Baselinehistologicalstaging ~ 1 + BMI + NauseaVomting Distribution = Normal Estimated Coefficients: Estimate SE tStat pValue __________ _________ _______ __________ (Intercept) 0.73428 0.09579 7.6655 3.3388e-14 BMI -0.0086275 0.0032847 -2.6266 0.0087198 NauseaVomting 0.060158 0.026769 2.2473 0.024776 1385 observations, 1382 error degrees of freedom Estimated Dispersion: 0.248 F-statistic vs. constant model: 5.94, p-value = 0.0027

Thus, BMI and NauseaVomiting only are sigificant variables.

METHOD 3

mdls=fitglm(HCV_new,'link','logit','Distribution',"binomial")
mdls =
Generalized linear regression model: logit(Baselinehistologicalstaging) ~ 1 + Age + Gender + BMI + Fever + NauseaVomting + Headache + Diarrhea + Fatiguegeneralizedboneache + Jaundice + Epigastricpain + WBC + RBC + HGB + Plat + AST1 + ALT1 + ALT4 + ALT12 + ALT24 + ALT36 + ALT48 + ALTafter24w + RNABase + RNA4 + RNA12 + RNAEOT + RNAEF + BaselinehistologicalGrading Distribution = Binomial Estimated Coefficients: Estimate SE tStat pValue __________ ________ _________ _________ (Intercept) 0.35215 0.52294 0.6734 0.50069 Age -0.1998 0.18133 -1.1019 0.27052 Gender 0.18327 0.10978 1.6693 0.095049 BMI -0.45906 0.17625 -2.6045 0.0092001 Fever -0.096796 0.10965 -0.88278 0.37736 NauseaVomting 0.22459 0.11049 2.0327 0.042082 Headache -0.012455 0.10975 -0.11349 0.90965 Diarrhea -0.0043929 0.11035 -0.039808 0.96825 Fatiguegeneralizedboneache -0.023867 0.10999 -0.21699 0.82822 Jaundice -0.039412 0.10975 -0.3591 0.71952 Epigastricpain -0.21225 0.11068 -1.9177 0.055149 WBC 0.12247 0.18791 0.65173 0.51457 RBC -0.061554 0.19097 -0.32232 0.74721 HGB 0.025732 0.16049 0.16033 0.87262 Plat -0.19159 0.19013 -1.0077 0.31361 AST1 -0.11961 0.18768 -0.63729 0.52394 ALT1 0.16652 0.1899 0.87688 0.38055 ALT4 -0.062659 0.18517 -0.33839 0.73507 ALT12 0.035184 0.18787 0.18728 0.85144 ALT24 -0.067583 0.18686 -0.36167 0.7176 ALT36 0.015476 0.25555 0.06056 0.95171 ALT48 -0.12008 0.25877 -0.46406 0.64261 ALTafter24w 0.43729 0.3107 1.4074 0.1593 RNABase 0.2293 0.18612 1.232 0.21796 RNA4 -0.19377 0.18267 -1.0608 0.28879 RNA12 0.72857 0.8232 0.88505 0.37613 RNAEOT -0.34134 0.19572 -1.7441 0.081148 RNAEF 0.42108 0.19525 2.1567 0.031031 BaselinehistologicalGrading -0.023228 0.013727 -1.6921 0.090621 1385 observations, 1356 error degrees of freedom Dispersion: 1 Chi^2-statistic vs. constant model: 36.4, p-value = 0.134
Generally we start with deleting the variables with highest p-value
new=HCV_new;
while mdls.devianceTest.pValue(2)>0.05
[b3,i3]=maxk(mdls.Coefficients.pValue,1);
sprintf('Variable removed :%s',string(new.Properties.VariableNames(i-1)))
new.(i3-1)=[];
mdls=fitglm(new,'link','logit','Distribution',"binomial");
end
ans = 'Variable removed :Diarrhea'
ans = 'Variable removed :ALT36'
ans = 'Variable removed :Headache'
ans = 'Variable removed :HGB'
ans = 'Variable removed :ALT12'
mdls.devianceTest.pValue(2)
ans = 0.0387
fitglm(new)
ans =
Generalized linear regression model: Baselinehistologicalstaging ~ 1 + Age + Gender + BMI + Fever + NauseaVomting + Fatiguegeneralizedboneache + Jaundice + Epigastricpain + WBC + RBC + Plat + AST1 + ALT1 + ALT4 + ALT24 + ALT48 + ALTafter24w + RNABase + RNA4 + RNA12 + RNAEOT + RNAEF + BaselinehistologicalGrading Distribution = Normal Estimated Coefficients: Estimate SE tStat pValue __________ ________ ________ __________ (Intercept) 0.59542 0.11498 5.1785 2.5724e-07 Age -0.048631 0.044394 -1.0955 0.27351 Gender 0.044787 0.026915 1.664 0.096339 BMI -0.11186 0.042944 -2.6048 0.0092937 Fever -0.023849 0.026871 -0.88752 0.37495 NauseaVomting 0.054664 0.027097 2.0174 0.043852 Fatiguegeneralizedboneache -0.0054942 0.026967 -0.20374 0.83859 Jaundice -0.0097486 0.026913 -0.36223 0.71724 Epigastricpain -0.051626 0.027098 -1.9052 0.056971 WBC 0.029594 0.045991 0.64348 0.52002 RBC -0.014393 0.046741 -0.30794 0.75817 Plat -0.047036 0.046584 -1.0097 0.31282 AST1 -0.029058 0.045984 -0.63191 0.52755 ALT1 0.039508 0.046374 0.85193 0.3944 ALT4 -0.015337 0.04533 -0.33834 0.73516 ALT24 -0.016489 0.045864 -0.35952 0.71926 ALT48 -0.029294 0.063396 -0.46208 0.6441 ALTafter24w 0.10533 0.076048 1.385 0.16628 RNABase 0.054924 0.045563 1.2055 0.22823 RNA4 -0.047105 0.044757 -1.0525 0.29278 RNA12 0.17708 0.20204 0.87649 0.38092 RNAEOT -0.083026 0.047905 -1.7331 0.083296 RNAEF 0.10219 0.047674 2.1435 0.032249 BaselinehistologicalGrading -0.0056369 0.00336 -1.6776 0.093648 1385 observations, 1361 error degrees of freedom Estimated Dispersion: 0.248 F-statistic vs. constant model: 1.57, p-value = 0.0421
Here, we realise that all other methods look at every feature individually but this, looks at them as a whole and makes a linear model of those and the determines the p-value of the model.

METHOD 4

The Kruskal-Wallis Test has been used for continuous variables with non-normal distribution. The Chi-square test has been used for categorical variables. Pearson correlation coefficients between fibrosis and each variable have been assessed. https://ieeexplore-ieee-org.proxy.lib.umich.edu/document/7891989
for i=[1,3,11:28]
p41(i)=kruskalwallis(HCV_new.(i),HCV_new.Baselinehistologicalstaging,"off");
end
for i=[2,4:10]
[~,~,p41(i)] = crosstab(HCV_new.(i),HCV_new.Baselinehistologicalstaging);
end
[r1,p42]=corr(X,Y,'type',"Pearson")
r1 = 28×1
-0.0247 0.0405 -0.0700 -0.0192 0.0598 -0.0019 0.0020 -0.0021 -0.0008 -0.0472
p42 = 28×1
0.3591 0.1321 0.0092 0.4759 0.0261 0.9441 0.9411 0.9380 0.9763 0.0788
sortrows(array2table([p41',r1,p42],'RowNames',HCV.Properties.VariableNames(1:28),'VariableNames',{'P-value','Pearsons_Coeff','Pearsons_P-value'}),'P-value')
ans = 28×3 table
 P-valuePearsons_CoeffPearsons_P-value
1 BMI0.0089-0.07000.0092
2 NauseaVomting0.02610.05980.0261
3 RNAEF0.05430.05530.0394
4 Epigastricpain0.0787-0.04720.0788
5 Gender0.13190.04050.1321
6 BaselinehistologicalGrading0.1379-0.04100.1271
7 ALTafter24w0.14190.03730.1648
8 RNA120.16590.02980.2680
9 RNABase0.18230.03580.1834
10 RNA40.2354-0.03240.2288
11 Age0.3654-0.02470.3591
12 ALT10.38040.02300.3929
13 Plat0.4488-0.02070.4424
14 Fever0.4756-0.01920.4759
15 AST10.6030-0.01420.5987
16 ALT480.6776-0.01180.6603
17 ALT40.7387-0.00800.7675
18 WBC0.74790.00840.7548
19 ALT120.85940.00460.8641
20 ALT240.8728-0.00480.8578
21 RNAEOT0.8918-0.01030.7020
22 ALT360.89200.00280.9179
23 RBC0.9047-0.00350.8968
24 Fatiguegeneralizedboneache0.9380-0.00210.9380
25 Diarrhea0.94100.00200.9411
26 Headache0.9440-0.00190.9441
27 Jaundice0.9763-0.00080.9763
28 HGB0.9946-0.00030.9900
As the p-value increases, the strength of correlation given by Pearson's coefficient decreases. For significance level of 0.05, BMI, NauseaVomting and RNAEF are significant again just like method 1 for Pearson's correlation while BMI and NauseaVomting are significant like method 2.

METHOD 5

Unsupervised Learning - PCA
Xpca=zscore((HCV_array-min(HCV_array))./(max(HCV_array)-min(HCV_array)));
ExpectedOutput=table2array(HCV_new(:,29));
[coeff,score,latent,~,explained]=pca(Xpca)
coeff = 29×29
-0.0445 -0.0331 -0.1547 -0.1982 -0.1955 0.0982 -0.1319 -0.1184 -0.2385 -0.0265 0.0644 0.3006 0.5806 0.1130 0.0612 -0.0897 0.2583 -0.0894 -0.1774 -0.1875 -0.1684 0.2457 -0.1664 0.1906 -0.1040 0.0157 0.1668 -0.0831 -0.0289 -0.0069 -0.0093 -0.0289 0.0738 0.2578 0.1900 0.2896 0.0796 -0.3043 0.3899 -0.0790 0.3485 -0.0788 -0.1738 0.0304 -0.0007 -0.2697 0.1679 0.0554 -0.2674 0.0435 0.1963 -0.3018 -0.2321 0.0825 -0.0792 0.0495 0.1051 0.0610 -0.0614 -0.0124 0.2295 0.4466 -0.2109 -0.1458 -0.1181 -0.0580 -0.1594 0.0366 -0.1910 0.0988 -0.0059 0.0442 0.2867 0.2187 -0.1550 0.2557 0.0180 0.0683 -0.1942 -0.1781 -0.1713 0.2143 -0.1471 0.4325 0.0796 -0.0387 0.0144 0.0187 -0.0003 -0.1485 0.1059 0.0245 -0.0969 -0.1100 0.4491 -0.0373 0.3947 0.1097 -0.1350 -0.1754 -0.1468 0.0374 -0.2470 0.3866 0.0646 -0.3261 -0.1910 -0.0501 -0.1270 0.1037 0.1722 0.1900 0.1955 0.0981 -0.0171 0.0268 0.0196 0.5239 -0.0087 0.0392 -0.0143 -0.1455 0.1143 0.0254 0.0860 -0.1818 -0.0612 -0.1647 0.0536 0.0792 -0.0992 0.1630 0.2692 0.2505 0.1548 -0.2171 0.1956 -0.1224 -0.3927 0.0797 0.1360 -0.1865 0.2981 0.0907 -0.0325 0.0071 0.1349 -0.1997 -0.2040 -0.0519 -0.1642 -0.3614 -0.1303 -0.0346 0.0966 -0.2767 0.0088 -0.1325 -0.3216 0.1269 -0.1500 0.1103 0.2553 0.1768 0.4345 -0.0052 0.3648 -0.1272 -0.0094 0.1491 -0.0236 -0.0692 -0.0384 0.0221 0.0381 0.1366 -0.3065 0.0907 -0.1760 0.3009 0.0106 -0.3903 -0.1089 0.2452 0.0141 -0.0104 -0.0026 -0.0273 -0.2374 0.0513 0.0300 -0.2977 0.0934 0.1057 0.0983 -0.3344 -0.1460 0.0955 0.3195 0.2472 -0.2016 0.0673 -0.0450 0.0389 -0.1280 -0.0237 0.0409 0.4860 -0.1945 0.0663 -0.0311 -0.2372 0.1134 0.0030 0.1074 0.1305 0.3650 0.2035 -0.0485 0.0867 -0.0980 0.2495 0.3162 0.2063 -0.1165 0.1648 0.2355 0.2070 -0.0639 0.2443 -0.0236 0.0205 0.0602 -0.0545 -0.1540 -0.2501 -0.0252 -0.0145 0.1026 0.1238 -0.1791 -0.3374 0.3913 0.2320 -0.3495 -0.0813 0.1177 0.0623 0.2175 -0.0259 0.4153 -0.0739 -0.0852 -0.0410 -0.0532 -0.0664 -0.0868 0.3619 0.0165 -0.0001 0.0178 0.0905 -0.3127 -0.0890 0.1407 0.1438 -0.2806 -0.1504 -0.4428 0.0214 0.0254 -0.0778 -0.0943 -0.0014 -0.1463 -0.1890 0.1445 0.1906 -0.0007 -0.0331 -0.2599 0.1144 0.0838 0.1369 -0.3718 -0.0491 0.2059 0.3418 0.0790 -0.0651
score = 1385×29
-1.8028 -1.3862 2.4746 0.7122 0.1476 -1.5091 -1.6015 0.2543 -3.6699 -0.7729 2.1479 -1.0909 -0.0221 -1.7456 -0.0120 -0.5084 0.2455 -2.1750 -0.5932 0.2340 0.5765 -0.3133 -0.4243 1.5533 -1.7405 0.4974 1.7108 0.4960 -0.3551 0.0844 0.0015 -0.5168 -0.3531 -0.6223 -1.2432 -0.2816 0.3392 -0.8735 -1.6623 -0.1674 0.6442 0.2792 1.2687 -0.1586 -0.3635 0.4801 2.3060 2.0581 0.7802 -2.1000 -1.0551 0.9003 -0.1619 0.8951 -1.9566 -0.5442 0.8371 -0.7297 0.5870 2.6263 2.5980 -1.2570 -2.2253 -1.2445 -1.2880 0.2439 -2.6348 1.1011 1.0349 -1.4355 0.7275 -1.5158 -1.7761 -0.7284 -0.9059 -1.1954 -1.5401 1.5609 0.3400 -0.4237 -1.6903 0.7462 -0.0081 0.8451 1.6401 -2.0231 -0.5049 2.1241 1.0550 0.0474 -0.7747 -1.8014 1.3647 1.9753 -0.2040 -0.6977 0.0585 0.2641 -0.0673 0.3126 -0.2032 0.8141 0.0668 -1.3120 0.4226 0.2715 0.2068 -0.3781 -1.5168 -1.3300 -0.5396 -0.9788 0.6570 1.3439 0.2774 -0.5394 6.7676 -0.0567 -0.0806 0.8354 -0.0746 -1.4898 -2.7998 -0.5408 -1.1052 -1.6936 1.4152 0.6738 1.3214 1.0340 1.9426 -0.9336 -0.9943 -0.2710 0.4942 0.5246 -2.2978 2.3795 1.1387 1.1837 -0.5406 0.2423 -0.4024 8.7847 -1.8595 -1.7332 -0.3437 -1.5178 -1.6311 -1.0583 -0.6985 1.2602 2.3499 -1.6521 -0.3821 -0.5325 0.9500 0.7040 -0.8247 0.6601 0.7614 1.2019 0.4247 0.4493 -0.4178 2.1893 1.1896 0.7698 0.5499 1.4908 -1.2113 0.3066 -0.1531 0.5038 0.5961 -0.8996 0.7741 -1.5298 0.5618 0.8234 0.7948 -1.7221 -0.3896 0.3430 0.3753 0.8471 -0.3689 -1.2636 -1.0044 0.1368 -0.6238 0.5759 1.1005 1.0454 0.2723 0.4953 0.9739 0.1185 1.5580 0.9910 1.0670 0.1311 1.1609 1.6028 0.4924 -1.2485 0.2076 1.2905 0.4181 -0.5366 -1.5979 -0.0699 0.2810 -1.3076 1.0052 0.4271 -1.2157 -1.8217 -0.3566 -1.4215 0.8653 -0.9192 0.3467 0.1887 0.4015 -0.0720 0.5674 -2.1246 1.0139 -1.0848 0.9301 -0.0315 -0.5467 -0.6806 -0.0878 -0.7944 0.7599 -1.4810 0.2170 -2.9743 1.0271 1.3806 0.5221 -0.0318 0.3773 0.4073 -0.7797 -0.5405 0.7199 0.1690 0.4961 0.7933 0.3819 0.0375 0.7999 0.1203 0.0243 -0.8475 -2.0621 -0.7941 -0.1826 0.5522 -0.5901 -0.0358 -1.4620 -0.5657 -1.2414 -1.4621 -1.5076 0.5011 2.2116 0.2220 -1.1710 0.1015 -1.4020 0.9085 0.7306 0.3091 -0.1297 -1.4528 0.0091 0.7819 -1.3788 -0.3872 0.1499 -1.0544 0.2975 -1.4980 -0.1603 0.7953
latent = 29×1
1.8974 1.2473 1.2360 1.1929 1.1630 1.1430 1.1291 1.1069 1.0609 1.0553
explained = 29×1
6.5429 4.3012 4.2622 4.1136 4.0104 3.9413 3.8934 3.8169 3.6582 3.6390
b=bar(explained(1:15))
b =
Bar with properties: BarLayout: 'grouped' BarWidth: 0.8000 FaceColor: [0 0.4470 0.7410] EdgeColor: [0 0 0] BaseValue: 0 XData: [1 2 3 4 5 6 7 8 9 10 11 12 13 14 15] YData: [6.5429 4.3012 4.2622 4.1136 4.0104 3.9413 3.8934 3.8169 3.6582 3.6390 3.5928 3.5524 3.5298 3.4826 3.4101] Show all properties
xtips = b.XEndPoints;
ytips = b.YEndPoints;
labels = string(round(b.YData,2));
text(xtips,ytips,labels,'HorizontalAlignment','center',...
'VerticalAlignment','bottom')
xlabel('Principal Component')
ylabel('Percentange of explained variances')
title('Principle Component Analysis')
figure
gscatter(score(1:100,1), score(1:100,2), ExpectedOutput(1:100,1))
xlabel('Principal Component 1')
ylabel('Principal Component 2')
title('PCA of HCV data')
sortrows(array2table([coeff(:,1),coeff(:,2),coeff(:,3),coeff(:,4)],'RowNames',HCV_new.Properties.VariableNames(1:29),'VariableNames',{'PC1','PC2','PC3','PC4'}),'PC1','descend')
ans = 29×4 table
 PC1PC2PC3PC4
1 RNAEF0.5708-0.01140.0411-0.0047
2 RNAEOT0.5628-0.04390.07240.0392
3 RNA120.5518-0.01010.03100.0457
4 Epigastricpain0.0905-0.3127-0.08900.1407
5 Jaundice0.0602-0.0545-0.1540-0.2501
6 ALT360.05520.1020-0.0894-0.1370
7 Baselinehistologicalstaging0.05220.2022-0.1248-0.2079
8 Plat0.05050.38160.01220.2792
9 ALT40.04080.04280.0827-0.2766
10 Fatiguegeneralizedboneache0.0389-0.1280-0.02370.0409
11 Diarrhea0.03810.1366-0.30650.0907
12 ALT480.0259-0.2446-0.32360.0773
13 ALTafter24w0.0203-0.1381-0.18820.0130
14 NauseaVomting0.01960.5239-0.00870.0392
15 Fever0.0187-0.0003-0.14850.1059
16 ALT240.01850.20150.3262-0.2542
17 RNABase0.01470.0051-0.3082-0.0953
18 Headache0.00710.1349-0.1997-0.2040
19 Gender-0.0069-0.0093-0.02890.0738
20 HGB-0.01250.00990.40050.1555
21 ALT1-0.01890.2657-0.37020.2814
22 AST1-0.02870.0170-0.04670.0505
23 Age-0.0445-0.0331-0.1547-0.1982
24 ALT12-0.0472-0.15950.1466-0.3914
25 BaselinehistologicalGrading-0.0524-0.1264-0.01080.1052
26 RNA4-0.0575-0.18850.04030.1741
27 BMI-0.0614-0.01240.22950.4466
28 RBC-0.06260.22960.1561-0.0386
29 WBC-0.0800-0.22160.08440.0851
%For threshold 0.1050
PC12=mean(abs(coeff(:,1:2)),2);
sortrows(table(PC12,'Rownames',HCV_new.Properties.VariableNames),'PC12','descend')
ans = 29×1 table
 PC12
1 RNAEOT0.3033
2 RNAEF0.2911
3 RNA120.2809
4 NauseaVomting0.2717
5 Plat0.2161
6 Epigastricpain0.2016
7 WBC0.1508
8 RBC0.1461
9 ALT10.1423
10 ALT480.1352
11 Baselinehistologicalstaging0.1272
12 RNA40.1230
13 ALT240.1100
14 ALT120.1034
15 BaselinehistologicalGrading0.0894
16 Diarrhea0.0874
17 Fatiguegeneralizedboneache0.0835
18 ALTafter24w0.0792
19 ALT360.0786
20 Headache0.0710
21 Jaundice0.0574
22 ALT40.0418
23 Age0.0388
24 BMI0.0369
25 AST10.0228
26 HGB0.0112
27 RNABase0.0099
28 Fever0.0095
29 Gender0.0081
PC14=mean(abs(coeff(:,1:4)),2);
sortrows(table(PC14,'Rownames',HCV_new.Properties.VariableNames),'PC14','descend')
ans = 8×1 table
 PC14
1 ALT10.2340
2 ALT240.2001
3 BMI0.1875
4 ALT120.1862
5 Plat0.1809
6 RNAEOT0.1796
7 ALT480.1679
8 RNA120.1596
%Variables removed by thresholding PC1-PC2 interaction
X4=HCV_new;
del1=X4.Properties.VariableNames(find(PC12<0.1050)')
del1 = 1×16 cell array
'Age' 'Gender' 'BMI' 'Fever' 'Headache' 'Diarrhea' 'Fatiguegeneralizedboneache' 'Jaundice' 'HGB' 'AST1' 'ALT4' 'ALT12' 'ALT36' 'ALTafter24w' 'RNABase' 'BaselinehistologicalGrading'
X4=removevars(X4,del1)
X4 = 1385×13 table
 NauseaVomtingEpigastricpainWBCRBCPlatALT1ALT24ALT48RNA4RNA12RNAEOTRNAEFBaselinehistologicalstaging
1010.48670.35970.14330.50560.471900.52800.0772000
2101.00000.51000.27240.94380.83150.95930.44820.17070.41660.03840
3100.13030.66950.43840.11240.865200.550300.91030.68961
4100.38410.81380.40050.28090.55060.58540.37440.15700.92090.71861
5010.07350.65720.70940.73030.91010.69110.61471.00000.41930.29970
6100.96530.05490.28640.73030.29210.88620.90440001
7010.94720.77440.63130.20220.76400.60980.86040.07370.26540.78381
8010.47680.49040.92290.82020.06740.39020.06000.21100.45840.62481
9010.82210.65890.41870.49440.65170.27640.630200.45900.25060
10010.40500.53150.03890.32580.47190.30890.19180.07160.34050.68550
11000.15870.37320.54710.95510.15730.78050.08590.19610.55470.07400
12100.33600.26110.38410.11240.07870.43900.93450.15050.07810.99490
13010.69020.25000.82420.69660.62920.80490.44680000
14100.29060.42030.36040.24720.43820.87800.73590.15730.22610.96520
15100.93480.29050.97310.93260.55060.86180.92520.11290.54120.15380
16000.55370.89850.42330.15730.91010.65850.26150.02240.83060.16680
17110.86190.29020.78400.74160.49440.34150.60040001
18010.60210.54110.52650.69660.82020.51220.19220.12270.39380.31641
19110.39600.52580.73160.95510.20220.71540.29890.19920.50120.20111
20110.20350.51550.14090.92130.62920.71540.06360.19220.50010.58951
21000.32210.17900.17640.78650.17980.64230.65410.17930.65700.34860
22010.76410.98030.12000.42700.87640.30080.47660000
23110.54560.64810.01290.59550.01120.71540.36660.01420.02260.23121
24110.45500.67290.88680.70790.80900.43090.82600.02590.41420.94131
25110.33660.40290.96760.58430.02250.82110.81660.00330.44530.92971
26100.40140.59310.12630.82020.22470.59350.44990001
27010.88270.19650.01120.16850.35960.66670.44210.19770.90900.46010
28010.24620.90660.72910.91010.55060.76420.03830.01220.90700.02410
29110.33450.78770.25260.88760.60670.63410.15530001
30010.31340.77960.08780.37080.06740.35770.20620.20550.46650.38890
31110.26260.15220.66880.11240.25840.61790.77280.00800.15370.30120
32100.48160.15180.81000.79780.33710.54470.23940.16710.08270.04320
33010.49340.65150.55710.28090.28090.73980.34640.08670.88650.83740
34100.96370.63620.24450.08990.10110.39020.26500.12410.47170.34910
35110.37870.21550.19280.88760.10110.78050.07620.10840.83380.29931
36110.80270.27970.20830.70790.46070.99190.04290.09840.72730.92101
37100.48010.17200.97010.97750.97750.39840.40700.12370.41560.35450
38000.84670.91110.58980.04490.69660.69920.48350.21160.32520.14680
39000.00200.44740.01940.21350.23600.88620.05920.00760.03840.00170
40110.76450.10240.77500.47190.32581.00000.08530.10920.27210.50041
41110.39910.06090.67350.33710.08990.86180.26660.09360.67640.79470
42100.81250.40950.68260.19100.69660.69110.17170.04050.38060.21540
43010.79530.81650.06370.59550.64040.83740.88310.06190.24990.36261
44110.15320.76490.63440.10110.48310.78050.96270.11700.90080.84571
45110.98060.07160.03580.56180.26970.88620.35180.11040.11320.46451
46110.78470.62730.87530.86520.50560.53660.72210.20330.39540.19720
47010.05260.32950.42960.32580.87640.39020.68700.09540.12490.78261
48010.26660.38730.70420.01120.43820.42280.99640.04840.64880.36010
49000.43600.96170.72230.10110.34830.85370.58210.18180.94720.67431
50010.35760.42530.33780.78650.62920.54470.04120.01650.87150.30020
51000.23080.71810.22690.35960.48310.81300.83110.20260.21650.29811
52010.19820.19780.62110.21350.46070.78050.10880.01160.37180.11370
53010.22500.27180.20880.60670.19100.77240.30480.00360.71140.40920
54010.42920.50360.89370.02250.05620.97560.21460.10250.83210.47530
55100.46820.10250.78520.50560.83150.50410.47770.09540.09710.74920
56000.57390.15560.93430.168500.33330.17360.07140.06640.59400
57100.15660.69470.68190.40450.62920.31710.73250.04690.22420.22721
58010.36960.27680.20950.31460.35960.86180.12520.15680.00780.66311
59010.62330.66540.34200.07870.50560.41460.49750.09100.14470.97980
60010.70740.24950.27970.26970.02250.29270.95880.02500.79100.07850
61110.89500.85490.54020.38200.76400.30080.38960.02610.33290.20171
62010.22710.51410.33900.26970.83150.68290.19920.20250.35900.79451
63000.52410.12110.24440.56180.03370.40650.45330.19850.57950.25710
64010.86590.23410.53040.97750.71910.88620.17980.07960.93470.96260
65110.93290.05210.63540.29210.68540.85370.92340.17570.93190.68440
66100.11980.76090.94850.52811.00000.92680.05730.16950.25690.31951
67100.22910.69860.79450.66290.95510.32520.59430.02490.38860.12680
68010.17440.55960.58060.71910.89890.42280.24180.06880.11710.67141
69010.54800.87830.93330.98880.66290.45530.38760.10260.95920.03020
70110.27540.13420.82780.05620.78650.41460.61370.06090.50730.78581
71100.10590.41580.14430.98880.58430.57720.37460.09870.46590.86880
72010.71800.64200.52120.43820.01120.73170.96420.18930.32160.23510
73100.06190.65370.50120.24720.49440.82110.77220.14870.46400.19441
74010.14540.55140.43850.83150.06741.00000.08300.21110.51160.39121
75110.44500.67020.06180.98880.53930.56910.00070.13940.59870.56321
76110.54320.42750.43900.69660.62920.31710.95440.00560.16850.21250
77010.98830.31470.59870.25840.61800.68290.11420.14750.37310.28951
78110.64530.83370.31980.26970.91010.78860.43130.16390.56420.49700
79100.13820.54420.95650.17980.07870.94310.90580.01950.98340.53521
80110.62690.40800.97550.69660.31460.51220.32520.05280.48040.40981
81100.27650.71850.17130.26970.64040.78050.00550.15260.29260.77400
82110.84120.41450.15190.69660.28090.97560.03780.04570.24360.79920
83010.68760.77910.29730.33710.84270.36590.85330.14370.55680.60180
84110.31310.86210.07910.74160.97750.53660.13480.07590.29400.76351
85010.94620.33390.24020.61800.34830.30890.68500.01220.55300.93101
86110.35520.57150.15131.00000.97750.59350.72320.01390.32380.23280
87100.41790.06300.43770.31460.85390.69920.57900.10980.78330.11851
88110.39860.11450.09960.24720.52810.27640.26880.15660.57320.82761
89110.80660.94010.51400.77530.24720.52850.19720.19020.68410.69510
90000.33450.20100.26040.06740.35960.47150.84240.08070.42680.70850
91000.33340.14180.89800.64040.34830.47150.49290.00880.44500.29350
92110.81250.03760.47270.34830.37080.60160.48190.09990.70920.53950
93000.89120.25870.05010.77530.86520.66670.28470001
94110.08210.15950.28690.13480.55060.71540.84150.03040.91870.46741
95010.46810.26670.37360.76400.60670.43900.98730.17690.68730.61531
96100.80260.84350.78290.47190.24720.30890.96930001
97100.44340.42060.94680.34830.88760.46340.78470.21390.96190.48730
98110.36230.10690.74520.08990.42700.30080.53460.07420.15330.18010
99000.95470.67100.07870.41570.94380.93500.32040000
100110.64190.46270.90431.00000.12360.47970.14710.05540.19980.15180
â‹®
X4=table2array(X4);
%Variables removed by thresholding PC1-PC4 interaction
X5=HCV_new;
del2=X5.Properties.VariableNames(find(PC14<0.1050)')
del2 = 1×7 cell array
'Gender' 'Fever' 'Fatiguegeneralizedboneache' 'AST1' 'ALT36' 'ALTafter24w' 'BaselinehistologicalGrading'
X5=removevars(X5,del2)
X5 = 1385×22 table
 AgeBMINauseaVomtingHeadacheDiarrheaJaundiceEpigastricpainWBCRBCHGBPlatALT1ALT4ALT12ALT24ALT48RNABaseRNA4RNA12RNAEOTRNAEFBaselinehistologicalstaging
10.82761.0000000110.48670.35970.80000.14330.50560.14610.78650.471900.54560.52800.0772000
20.48280.5385110101.00000.510000.27240.94380.62920.40450.83150.95930.03380.44820.17070.41660.03840
30.86210.8462111000.13030.66950.40000.43840.11240.62920.76400.865200.47550.550300.91030.68961
40.58620.8462101100.38410.813800.40050.28090.78650.46070.55060.58540.86750.37440.15700.92090.71861
50.93100.7692010110.07350.65720.20000.70940.73030.31460.10110.91010.69110.54980.61471.00000.41930.29970
60.89660110100.96530.05491.00000.28640.73030.92130.64040.29210.88620.96370.90440001
70.34480.3077011110.94720.77440.40000.63130.20220.83150.88760.76400.60980.27120.86040.07370.26540.78381
80.55170.6154011010.47680.49040.20000.92290.82020.46070.98880.06740.39020.53380.06000.21100.45840.62481
90.41380.0769011010.82210.65890.40000.41870.49440.17980.70790.65170.27640.49240.630200.45900.25060
100.44830.6154011010.40500.53150.40000.03890.32580.37080.98880.47190.30890.95850.19180.07160.34050.68550
110.17240.1538010100.15870.37320.40000.54710.95510.80900.39330.15730.78050.85180.08590.19610.55470.07400
120.13790100000.33600.26110.60000.38410.11240.60670.14610.07870.43900.11460.93450.15050.07810.99490
130.44830.2308000010.69020.25000.60000.82420.69660.30340.15730.62920.80490.77970.44680000
140.06900100100.29060.42030.80000.36040.24720.28090.13480.43820.87800.32720.73590.15730.22610.96520
150.27590.7692110000.93480.29050.80000.97310.93260.75280.74160.55060.86180.94390.92520.11290.54120.15380
160.89660.9231000000.55370.89850.20000.42330.15730.12360.46070.91010.65850.51200.26150.02240.83060.16680
171.00001.0000111010.86190.290200.78400.74160.34830.52810.49440.34150.74940.60040001
180.79310.1538011110.60210.54110.80000.52650.69660.12360.62920.82020.51220.95360.19220.12270.39380.31641
190.82760.3846111110.39600.52581.00000.73160.95510.25840.41570.20220.71540.42190.29890.19920.50120.20111
200.10340.0769100010.20350.515500.14090.92130.26970.98880.62920.71540.89960.06360.19220.50010.58951
210.86210.0769011000.32210.17901.00000.17640.78650.88760.89890.17980.64230.14120.65410.17930.65700.34860
220.03450.2308011110.76410.980300.12000.42700.31460.47190.87640.30080.94510.47660000
230.31030.0769111010.54560.64810.80000.01290.59550.71910.73030.01120.71540.24430.36660.01420.02260.23121
240.24140.5385101010.45500.672900.88680.70790.41570.21350.80900.43090.82750.82600.02590.41420.94131
250.03450.1538111010.33660.40290.20000.96760.58430.86521.00000.02250.82110.20270.81660.00330.44530.92971
260.37930.9231110000.40140.59311.00000.12630.82020.67420.51690.22470.59350.79540.44990001
270.65520.9231011010.88270.19651.00000.01120.16851.00000.28090.35960.66670.63800.44210.19770.90900.46010
280.24140.8462010010.24620.90660.40000.72910.91010.83150.40450.55060.76420.40500.03830.01220.90700.02410
290.86210.3077110010.33450.78770.60000.25260.88760.66290.03370.60670.63410.23760.15530001
300.51720.5385010010.31340.77961.00000.08780.37080.87640.97750.06740.35770.35480.20620.20550.46650.38890
310.79310.8462110010.26260.15221.00000.66880.11240.22470.55060.25840.61790.99440.77280.00800.15370.30120
320.89661.0000111000.48160.151800.81000.79781.00000.64040.33710.54470.46430.23940.16710.08270.04320
330.51720.2308011010.49340.65150.20000.55710.28090.16850.93260.28090.73980.50290.34640.08670.88650.83740
341.00000.8462111000.96370.63620.60000.24450.08990.48310.70790.10110.39020.96570.26500.12410.47170.34910
350.17240.3846101110.37870.21550.20000.19280.88760.31460.80900.10110.78050.22700.07620.10840.83380.29931
360.31030.5385100010.80270.27970.80000.20830.70790.44940.26970.46070.99190.97010.04290.09840.72730.92101
370.96550.7692101100.48010.17200.80000.97010.97750.31460.97750.97750.39840.09360.40700.12370.41560.35450
380.75860.5385001000.84670.911100.58980.04490.07870.28090.69660.69920.03930.48350.21160.32520.14680
390.27590.4615010000.00200.44740.80000.01940.21350.25840.12360.23600.88620.80040.05920.00760.03840.00170
4000.6923100010.76450.10240.80000.77500.47190.57300.10110.32581.00000.71190.08530.10920.27210.50041
410.89660.8462111110.39910.06090.80000.67350.33710.98880.67420.08990.86180.87220.26660.09360.67640.79470
420.17240.0769100100.81250.40950.20000.68260.19100.01120.95510.69660.69110.22590.17170.04050.38060.21540
430.89660.0769001010.79530.81650.80000.06370.59550.61800.83150.64040.83740.22690.88310.06190.24990.36261
440.13790.0769101110.15320.76490.80000.63440.10110.41570.91010.48310.78050.49480.96270.11700.90080.84571
450.51721.0000110110.98060.07160.80000.03580.56180.08990.10110.26970.88620.54260.35180.11040.11320.46451
460.62070.8462100010.78470.627300.87530.86520.75280.80900.50560.53660.04820.72210.20330.39540.19720
470.41380.6923000010.05260.32950.40000.42960.32580.98880.08990.87640.39020.62530.68700.09540.12490.78261
480.37930.8462011010.26660.387300.70420.01120.03370.03370.43820.42280.61650.99640.04840.64880.36010
490.75860.8462001100.43600.961700.72230.10110.88760.22470.34830.85370.70900.58210.18180.94720.67431
500.93100.3077000010.35760.42530.40000.33780.78650.94380.51690.62920.54470.57120.04120.01650.87150.30020
510.03450.6923001100.23080.71811.00000.22690.35960.25840.87640.48310.81300.81030.83110.20260.21650.29811
520.82760.0769001110.19820.19780.80000.62110.21350.68540.82020.46070.78050.42370.10880.01160.37180.11370
530.31030.8462000110.22500.27180.60000.20880.60670.78650.10110.19100.77240.09910.30480.00360.71140.40920
540.93100.7692000110.42920.50360.40000.89370.02250.51690.65170.05620.97560.21130.21460.10250.83210.47530
550.51720.3846100000.46820.10250.80000.78520.50560.95510.22470.83150.50410.49840.47770.09540.09710.74920
560.62070.9231001000.57390.15560.80000.93430.16850.98880.236000.33330.12460.17360.07140.06640.59400
570.24140.6154100000.15660.694700.68190.40450.15730.40450.62920.31710.19370.73250.04690.22420.22721
580.55170.8462010010.36960.27681.00000.20950.31460.01120.93260.35960.86180.29190.12520.15680.00780.66311
5900.3846000010.62330.66540.20000.34200.07870.19100.91010.50560.41460.41870.49750.09100.14470.97980
600.03450.1538001010.70740.24950.60000.27970.26970.79780.16850.02250.29270.84140.95880.02500.79100.07850
610.65520.3077111110.89500.854900.54020.38200.23600.48310.76400.30080.16010.38960.02610.33290.20171
620.62070.0769010110.22710.51410.60000.33900.26970.21350.74160.83150.68290.30420.19920.20250.35900.79451
630.34480.0769011000.52410.12110.60000.24440.56180.03370.23600.03370.40650.51630.45330.19850.57950.25710
640.55170.8462011010.86590.23411.00000.53040.97750.76400.39330.71910.88620.08820.17980.07960.93470.96260
650.44830.6923111110.93290.05211.00000.63540.29210.35960.05620.68540.85370.35040.92340.17570.93190.68440
660.89660.4615111000.11980.76090.60000.94850.52810.15730.88761.00000.92680.14580.05730.16950.25690.31951
670.13790.6923101100.22910.69861.00000.79450.66290.05620.22470.95510.32520.98060.59430.02490.38860.12680
680.62070010110.17440.55960.80000.58060.71910.14610.57300.89890.42280.49140.24180.06880.11710.67141
690.27590.0769001010.54800.878300.93330.98880.61800.56180.66290.45530.60600.38760.10260.95920.03020
700.17240.3846100110.27540.13420.80000.82780.05620.20220.82020.78650.41460.77990.61370.06090.50730.78581
710.24141.0000110100.10590.41581.00000.14430.98880.66290.86520.58430.57720.52690.37460.09870.46590.86880
720.75860.3846001010.71800.64201.00000.52120.43820.86520.84270.01120.73170.55860.96420.18930.32160.23510
730.37930.0769101000.06190.65370.60000.50120.24720.30340.46070.49440.82110.27280.77220.14870.46400.19441
741.00000.2308011110.14540.55140.40000.43850.83150.49440.61800.06741.00000.86670.08300.21110.51160.39121
750.41380.4615101110.44500.67020.80000.06180.98880.20220.95510.53930.56910.22130.00070.13940.59870.56321
760.93100.2308100010.54320.42751.00000.43900.69660.58430.83150.62920.31710.76760.95440.00560.16850.21250
7700.1538001110.98830.31471.00000.59870.25840.73030.21350.61800.68290.14040.11420.14750.37310.28951
7800.3077111010.64530.83370.80000.31980.26970.55060.38200.91010.78860.92790.43130.16390.56420.49700
790.06900.6923101100.13820.54420.20000.95650.17980.98880.17980.07870.94310.14200.90580.01950.98340.53521
800.20690.9231101010.62690.40800.80000.97550.69660.22470.77530.31460.51220.04560.32520.05280.48040.40981
811.00000.3077110100.27650.71851.00000.17130.26970.68540.13480.64040.78050.47510.00550.15260.29260.77400
820.03450.9231101010.84120.41450.20000.15190.69660.51690.40450.28090.97560.50140.03780.04570.24360.79920
830.82760.6154000010.68760.77911.00000.29730.33710.64040.78650.84270.36590.35180.85330.14370.55680.60180
840.82760.3077111110.31310.86210.40000.07910.74160.93260.61800.97750.53660.97340.13480.07590.29400.76351
850.06900.9231010110.94620.333900.24020.61800.26970.60670.34830.30890.95340.68500.01220.55300.93101
860.24140.6923100010.35520.571500.15131.00000.47190.75280.97750.59350.57790.72320.01390.32380.23280
870.68970.3846111100.41790.06300.40000.43770.31460.22470.60670.85390.69920.97250.57900.10980.78330.11851
880.24140.4615100010.39860.11450.20000.09960.24720.60670.65170.52810.27640.67120.26880.15660.57320.82761
890.17240.8462110010.80660.94010.80000.51400.77530.73030.79780.24720.52850.56870.19720.19020.68410.69510
900.86210.0769001000.33450.20100.60000.26040.06740.93260.14610.35960.47150.32010.84240.08070.42680.70850
910.89660.9231011100.33340.14180.40000.89800.64040.943800.34830.47150.63580.49290.00880.44500.29350
920.44830.2308101110.81250.03760.60000.47270.34830.03370.26970.37080.60160.40640.48190.09990.70920.53950
930.96550.2308011100.89120.258700.05010.77530.95510.89890.86520.66670.52690.28470001
940.37930.3077111110.08210.15950.20000.28690.13480.39330.85390.55060.71540.70820.84150.03040.91870.46741
950.89660.3077001010.46810.26670.40000.37360.76400.28090.87640.60670.43900.22680.98730.17690.68730.61531
960.17240.4615110100.80260.84350.60000.78290.47190.25840.60670.24720.30890.38270.96930001
970.27590.6923110000.44340.420600.94680.34830.24720.28090.88760.46340.72710.78470.21390.96190.48730
980.41380.6923100010.36230.10690.20000.74520.08990.33710.07870.42700.30080.27700.53460.07420.15330.18010
990.13790.9231010100.95470.67100.60000.07870.41570.76400.82020.94380.93500.62190.32040000
1000.96550.4615110010.64190.46270.40000.90431.00000.33710.28090.12360.47970.06730.14710.05540.19980.15180
â‹®
X5=table2array(X5);
Thus, the efficiency of models with distinct features selected by method 1, 2, 3 and 5 will be determined.

SUPERVISED LEARNING

TRAINING SUPERVISED LEARNING MODEL (Categorical Variables can be specified)

The problem here is classification based so we use logistic regression and not linear regression.

Using Method 1 of feature selection

%Method 1
X1=[HCV_new.BMI, HCV_new.NauseaVomting, HCV_new.RNAEF];
[Xtrain1, Ytrain1, Xtest1, Ytest1]=trainTestSplit(X1,Y,0.7);

Why new classification models?

There is point implementing new methods only if they improve the classification results.
%Logistic Regression
tic
Logmodel=fitglm(Xtrain1,categorical(Ytrain1),'link','logit','Distribution',"binomial",'CategoricalVars',{'x2'});
pred1=predict(Logmodel,Xtest1);pred1(pred1<0.5)=0;pred1(pred1>=0.5)=1;
[accuracy1,precision1,recall1]=meas(categorical(pred1),Ytest1);
toc
Elapsed time is 0.076980 seconds.
%Random Forest (Classification)
tic
RFmodel=TreeBagger(100,Xtrain1,categorical(Ytrain1),'MinLeafSize',10,'OOBPrediction',"on",'OOBPredictorImportance',"on",'CategoricalPredictors',[2]);
pred2=categorical(predict(RFmodel,Xtest1));
[accuracy2,precision2,recall2]=meas(pred2,Ytest1);
toc
Elapsed time is 2.735312 seconds.
%Decision Tree (Classification)
tic
DecisionTree=fitctree(Xtrain1,categorical(Ytrain1),"CategoricalPredictors",[2]);
pred3=predict(DecisionTree,Xtest1);
[accuracy3,precision3,recall3]=meas(pred3,Ytest1);
toc
Elapsed time is 0.062668 seconds.
%Naive Bayes Method
tic
NBmodel=fitcnb(Xtrain1,categorical(Ytrain1),'PredictorNames',{'BMI','NauseaVomiting','RNAEF'},"CategoricalPredictors",'NauseaVomiting');
pred4=predict(NBmodel,Xtest1);
[accuracy4,precision4,recall4]=meas(pred4,Ytest1);
toc
Elapsed time is 0.044085 seconds.
%Support vector machine (SVM) classifier for one-class and binary classification
tic
CSVM=fitcsvm(Xtrain1,categorical(Ytrain1),"CategoricalPredictors",[2]);
pred3=predict(CSVM,Xtest1);
[accuracy5,precision5,recall5]=meas(pred3,Ytest1);
toc
Elapsed time is 0.108909 seconds.
We can conclude that Naive Bayes has similar metrics as that of logistics regression, but takes least time for execution amongst the above mentioned classifiers. And so, I decided of executing the Naive Bayes Classifier as well.

5 fold cross validation

fold=5;
index1=crossvalind('kfold',size(Xtrain1,1),fold);
accuracy1=zeros(1,fold);
accuracy2=zeros(1,fold);
accuracy3=zeros(1,fold);
accuracy4=zeros(1,fold);
precision1=zeros(1,fold);
precision2=zeros(1,fold);
precision3=zeros(1,fold);
precision4=zeros(1,fold);
err=zeros(1,fold);
recall1=zeros(1,fold);
recall2=zeros(1,fold);
recall3=zeros(1,fold);
recall4=zeros(1,fold);
accuracy5=zeros(1,fold);
precision5=zeros(1,fold);
recall5=zeros(1,fold);
for i=1:fold
test=(index1==i);
train=~test;
Xtrain1=X1(train,:);
Xtest1=X1(test,:);
Ytrain1=Y(train,:);
Ytest1=Y(test,:);
%Logistic regression
Logmodel=fitglm(Xtrain1,categorical(Ytrain1),'link','logit','Distribution',"binomial",'CategoricalVars',{'x2'});
pred1=predict(Logmodel,Xtest1);pred1(pred1<0.5)=0;pred1(pred1>=0.5)=1;
[accuracy1(1,i),precision1(1,i),recall1(1,i)]=meas(categorical(pred1),Ytest1);
%Random Forest (Classification)
RFmodel=TreeBagger(100,Xtrain1,categorical(Ytrain1),'MinLeafSize',10,'OOBPrediction',"on",'OOBPredictorImportance',"on",'CategoricalPredictors',[2]);
pred2=categorical(predict(RFmodel,Xtest1)); %pred2=double(categorical(pred2));
[accuracy2(1,i),precision2(1,i),recall2(1,i)]=meas(pred2,Ytest1);
%Decision Tree (Classification)
DecisionTree=fitctree(Xtrain1,categorical(Ytrain1),'PredictorNames',{'BMI','NauseaVomiting','RNAEF'},"CategoricalPredictors",'NauseaVomiting');
pred3=predict(DecisionTree,Xtest1);
[accuracy3(1,i),precision3(1,i),recall3(1,i)]=meas(pred3,Ytest1);
err(1,i)=cvloss(DecisionTree);
%Naive Bayes Method
NBmodel=fitcnb(Xtrain1,categorical(Ytrain1),"CategoricalPredictors",[2]);
pred4=predict(NBmodel,Xtest1);
[accuracy4(1,i),precision4(1,i),recall4(1,i)]=meas(pred4,Ytest1);
%Multinomial Logistic Regression - Categorical Variables cannot be
%specified
[b5,dev5,stats5]=mnrfit(Xtrain1,categorical(Ytrain1));
[~,pred5]=max(mnrval(b5,Xtest1)');pred5=(pred5-1)';
[accuracy5(1,i),precision5(1,i),recall5(1,i)]=meas(categorical(pred5),Ytest1);
end
avg_cvloss1=mean(err);
method1=[mean(accuracy1),mean(precision1),mean(recall1);...
mean(accuracy2),mean(precision2),mean(recall2);...
mean(accuracy3),mean(precision3),mean(recall3);...
mean(accuracy4),mean(precision4),mean(recall4);...
mean(accuracy5),mean(precision5),mean(recall5)]
method1 = 5×3
0.5211 0.4832 0.3336 0.4943 0.5272 0.4205 0.4665 0.5521 0.4989 0.5273 0.4744 0.3404 0.5211 0.4832 0.3336

Using Method 2 of feature selection

%Method 2
X2=X1(:,1:2);
[Xtrain2, Ytrain2, Xtest2, Ytest2]=trainTestSplit(X2, Y,0.7);

5 fold cross validation

fold=5;
index1=crossvalind('kfold',size(Xtrain2,1),fold);
accuracy1=zeros(1,fold);
accuracy2=zeros(1,fold);
accuracy3=zeros(1,fold);
accuracy4=zeros(1,fold);
precision1=zeros(1,fold);
precision2=zeros(1,fold);
precision3=zeros(1,fold);
precision4=zeros(1,fold);
err=zeros(1,fold);
recall1=zeros(1,fold);
recall2=zeros(1,fold);
recall3=zeros(1,fold);
recall4=zeros(1,fold);
accuracy5=zeros(1,fold);
precision5=zeros(1,fold);
recall5=zeros(1,fold);
for i=1:fold
test=(index1==i);
train=~test;
Xtrain2=X2(train,:);
Xtest2=X2(test,:);
Ytrain2=Y(train,:);
Ytest2=Y(test,:);
%Logistic Regression
Logmodel=fitglm(Xtrain2,categorical(Ytrain2),'link','logit','Distribution',"binomial",'CategoricalVars',{'x2'});
pred1=predict(Logmodel,Xtest2);pred1(pred1<0.5)=0;pred1(pred1>=0.5)=1;
[accuracy1(1,i),precision1(1,i),recall1(1,i)]=meas(categorical(pred1),Ytest2);
%Random Forest (Classification)
RFmodel=TreeBagger(100,Xtrain2,categorical(Ytrain2),'MinLeafSize',10,'OOBPrediction',"on",'OOBPredictorImportance',"on",'CategoricalPredictors',[2]);
pred2=categorical(predict(RFmodel,Xtest2));
[accuracy2(1,i),precision2(1,i),recall2(1,i)]=meas(pred2,Ytest2);
%Decision Tree (Classification)
DecisionTree=fitctree(Xtrain2,categorical(Ytrain2),'PredictorNames',{'BMI','NauseaVomiting'},"CategoricalPredictors",'NauseaVomiting');
pred3=predict(DecisionTree,Xtest2);
[accuracy3(1,i),precision3(1,i),recall3(1,i)]=meas(pred3,Ytest2);
err(1,i)=cvloss(DecisionTree);
%Naive Bayes Method
NBmodel=fitcnb(Xtrain2,categorical(Ytrain2),"CategoricalPredictors",[2]);
pred4=predict(NBmodel,Xtest2);
[accuracy4(1,i),precision4(1,i),recall4(1,i)]=meas(pred4,Ytest2);
%Multinomial Logistic Regression - Categorical Variables cannot be
%specified
[b5,dev5,stats5]=mnrfit(Xtrain2,categorical(Ytrain2));
[~,pred5]=max(mnrval(b5,Xtest2)');pred5=(pred5-1)';
[accuracy5(1,i),precision5(1,i),recall5(1,i)]=meas(categorical(pred5),Ytest2);
end
avg_cvloss2=mean(err);
method2=[mean(accuracy1),mean(precision1),mean(recall1);...
mean(accuracy2),mean(precision2),mean(recall2);...
mean(accuracy3),mean(precision3),mean(recall3);...
mean(accuracy4),mean(precision4),mean(recall4);...
mean(accuracy5),mean(precision5),mean(recall5)]
method2 = 5×3
0.5108 0.4970 0.3712 0.4923 0.5249 0.4614 0.5026 0.5123 0.4430 0.5119 0.4928 0.3566 0.5108 0.4970 0.3712

Using Method 3 of feature selection

%Method 3
X3=table2array(HCV_new(:,1:end-1));
colmin = min(X3); colmax = max(X3);
X3 = rescale(X3, 'InputMin', colmin, 'InputMax', colmax);
[Xtrain3, Ytrain3, Xtest3, Ytest3]=trainTestSplit(X3, Y,0.7);

5 fold cross validation

fold=5;
index1=crossvalind('kfold',size(Xtrain3,1),fold);
accuracy1=zeros(1,fold);
accuracy2=zeros(1,fold);
accuracy3=zeros(1,fold);
accuracy4=zeros(1,fold);
precision1=zeros(1,fold);
precision2=zeros(1,fold);
precision3=zeros(1,fold);
precision4=zeros(1,fold);
err=zeros(1,fold);
recall1=zeros(1,fold);
recall2=zeros(1,fold);
recall3=zeros(1,fold);
recall4=zeros(1,fold);
accuracy5=zeros(1,fold);
precision5=zeros(1,fold);
recall5=zeros(1,fold);
%Model 1
for i=1:fold
test=(index1==i);
train=~test;
Xtrain3=X3(train,:);
Xtest3=X3(test,:);
Ytrain3=Y(train,:);
Ytest3=Y(test,:);
%Logistic Regression
Logmodel=fitglm(Xtrain3,categorical(Ytrain3),'link','logit','Distribution',"binomial",'CategoricalVars',{'x2','x4','x5','x6','x7','x8'});
pred1=predict(Logmodel,Xtest3);pred1(pred1<0.5)=0;pred1(pred1>=0.5)=1;
[accuracy1(1,i),precision1(1,i),recall1(1,i)]=meas(categorical(pred1),Ytest3);
%Random Forest (Classification)
RFmodel=TreeBagger(100,Xtrain3,categorical(Ytrain3),'MinLeafSize',10,'OOBPrediction',"on",'OOBPredictorImportance',"on",'CategoricalPredictors',[2,4,5,6,7,8]);
pred2=categorical(predict(RFmodel,Xtest3));
[accuracy2(1,i),precision2(1,i),recall2(1,i)]=meas(pred2,Ytest3);
%Decision Tree (Classification)
DecisionTree=fitctree(Xtrain3,categorical(Ytrain3),"CategoricalPredictors",[2,4,5,6,7,8]);
pred3=predict(DecisionTree,Xtest3);
[accuracy3(1,i),precision3(1,i),recall3(1,i)]=meas(pred3,Ytest3);
err(1,i)=cvloss(DecisionTree);
%Naive Bayes Method
NBmodel=fitcnb(Xtrain3,categorical(Ytrain3),"CategoricalPredictors",[2,4,5,6,7,8]);
pred4=predict(NBmodel,Xtest3);
[accuracy4(1,i),precision4(1,i),recall4(1,i)]=meas(pred4,Ytest3);
%Multinomial Logistic Regression - Categorical Variables cannot be
%specified
[b5,dev5,stats5]=mnrfit(Xtrain3,categorical(Ytrain3));
[~,pred5]=max(mnrval(b5,Xtest3)');pred5=(pred5-1)';
[accuracy5(1,i),precision5(1,i),recall5(1,i)]=meas(categorical(pred5),Ytest3);
end
method3=[mean(accuracy1),mean(precision1),mean(recall1);...
mean(accuracy2),mean(precision2),mean(recall2);...
mean(accuracy3),mean(precision3),mean(recall3);...
mean(accuracy4),mean(precision4),mean(recall4);...
mean(accuracy5),mean(precision5),mean(recall5)]
method3 = 5×3
0.4882 0.5309 0.4574 0.4881 0.5362 0.4281 0.5211 0.4942 0.4394 0.4778 0.5432 0.4385 0.4882 0.5309 0.4574
avg_cvloss3=mean(err);
m1=array2table(method1,...
"RowNames",{'FSMethod1_glm','FSMethod1_TB','FSMethod1_DT','FSMethod1_NB','FSMethod1_MLR'},...
'VariableNames',{'Accuracy','Precision','Recall'})
m1 = 5×3 table
 AccuracyPrecisionRecall
1 FSMethod1_glm0.52110.48320.3336
2 FSMethod1_TB0.49430.52720.4205
3 FSMethod1_DT0.46650.55210.4989
4 FSMethod1_NB0.52730.47440.3404
5 FSMethod1_MLR0.52110.48320.3336
m2=array2table(method2,...
"RowNames",{'FSMethod2_glm','FSMethod2_TB','FSMethod2_DT','FSMethod2_NB','FSMethod2_MLR'},...
'VariableNames',{'Accuracy','Precision','Recall'})
m2 = 5×3 table
 AccuracyPrecisionRecall
1 FSMethod2_glm0.51080.49700.3712
2 FSMethod2_TB0.49230.52490.4614
3 FSMethod2_DT0.50260.51230.4430
4 FSMethod2_NB0.51190.49280.3566
5 FSMethod2_MLR0.51080.49700.3712
m3=array2table(method3,...
"RowNames",{'FSMethod3_glm','FSMethod3_TB','FSMethod3_DT','FSMethod3_NB','FSMethod3_MLR'},...
'VariableNames',{'Accuracy','Precision','Recall'})
m3 = 5×3 table
 AccuracyPrecisionRecall
1 FSMethod3_glm0.48820.53090.4574
2 FSMethod3_TB0.48810.53620.4281
3 FSMethod3_DT0.52110.49420.4394
4 FSMethod3_NB0.47780.54320.4385
5 FSMethod3_MLR0.48820.53090.4574

Using Method 5.1 of feature selection

%Method 5.1
X4=X4(:,1:end-1)
X4 = 1385×11
0 1.0000 0.4867 0.3597 0.1433 0.5056 0.4719 0 0.5280 0.0772 0 1.0000 0 1.0000 0.5100 0.2724 0.9438 0.8315 0.9593 0.4482 0.1707 0.4166 1.0000 0 0.1303 0.6695 0.4384 0.1124 0.8652 0 0.5503 0 0.9103 1.0000 0 0.3841 0.8138 0.4005 0.2809 0.5506 0.5854 0.3744 0.1570 0.9209 0 1.0000 0.0735 0.6572 0.7094 0.7303 0.9101 0.6911 0.6147 1.0000 0.4193 1.0000 0 0.9653 0.0549 0.2864 0.7303 0.2921 0.8862 0.9044 0 0 0 1.0000 0.9472 0.7744 0.6313 0.2022 0.7640 0.6098 0.8604 0.0737 0.2654 0 1.0000 0.4768 0.4904 0.9229 0.8202 0.0674 0.3902 0.0600 0.2110 0.4584 0 1.0000 0.8221 0.6589 0.4187 0.4944 0.6517 0.2764 0.6302 0 0.4590 0 1.0000 0.4050 0.5315 0.0389 0.3258 0.4719 0.3089 0.1918 0.0716 0.3405
[Xtrain51, Ytrain51, Xtest51, Ytest51]=trainTestSplit(X4, Y,0.7);

5 fold cross validation

fold=5;
index1=crossvalind('kfold',size(Xtrain51,1),fold);
accuracy1=zeros(1,fold);
accuracy2=zeros(1,fold);
accuracy3=zeros(1,fold);
accuracy4=zeros(1,fold);
precision1=zeros(1,fold);
precision2=zeros(1,fold);
precision3=zeros(1,fold);
precision4=zeros(1,fold);
err=zeros(1,fold);
recall1=zeros(1,fold);
recall2=zeros(1,fold);
recall3=zeros(1,fold);
recall4=zeros(1,fold);
accuracy5=zeros(1,fold);
precision5=zeros(1,fold);
recall5=zeros(1,fold);
for i=1:fold
test=(index1==i);
train=~test;
Xtrain51=X4(train,:);
Xtest51=X4(test,:);
Ytrain51=Y(train,:);
Ytest51=Y(test,:);
%Logistic Regression
Logmodel=fitglm(Xtrain51,categorical(Ytrain51),'link','logit','Distribution',"binomial",'CategoricalVars',[1,2]);
pred1=predict(Logmodel,Xtest51);pred1(pred1<0.5)=0;pred1(pred1>=0.5)=1;
[accuracy1(1,i),precision1(1,i),recall1(1,i)]=meas(categorical(pred1),Ytest51);
%Random Forest (Classification)
RFmodel=TreeBagger(100,Xtrain51,categorical(Ytrain51),'MinLeafSize',10,'OOBPrediction',"on",'OOBPredictorImportance',"on",'CategoricalPredictors',[1,2]);
pred2=categorical(predict(RFmodel,Xtest51));
[accuracy2(1,i),precision2(1,i),recall2(1,i)]=meas(pred2,Ytest51);
%Decision Tree (Classification)
DecisionTree=fitctree(Xtrain51,categorical(Ytrain51),"CategoricalPredictors",[1,2]);
pred3=predict(DecisionTree,Xtest51);
[accuracy3(1,i),precision3(1,i),recall3(1,i)]=meas(pred3,Ytest51);
err(1,i)=cvloss(DecisionTree);
%Naive Bayes Method
NBmodel=fitcnb(Xtrain51,categorical(Ytrain51),"CategoricalPredictors",[1,2]);
pred4=predict(NBmodel,Xtest51);
[accuracy4(1,i),precision4(1,i),recall4(1,i)]=meas(pred4,Ytest51);
%Multinomial Logistic Regression - Categorical Variables cannot be
%specified
[b5,dev5,stats5]=mnrfit(Xtrain51,categorical(Ytrain51));
[~,pred5]=max(mnrval(b5,Xtest51)');pred5=(pred5-1)';
[accuracy5(1,i),precision5(1,i),recall5(1,i)]=meas(categorical(pred5),Ytest51);
end
avg_cvloss51=mean(err);
method51=[mean(accuracy1),mean(precision1),mean(recall1);...
mean(accuracy2),mean(precision2),mean(recall2);...
mean(accuracy3),mean(precision3),mean(recall3);...
mean(accuracy4),mean(precision4),mean(recall4);...
mean(accuracy5),mean(precision5),mean(recall5)]
method51 = 5×3
0.4954 0.5284 0.3922 0.4933 0.5296 0.4137 0.4902 0.5299 0.4781 0.5036 0.5181 0.3500 0.4954 0.5284 0.3922
m4=array2table(method51,...
"RowNames",{'FSMethod51_glm','FSMethod51_TB','FSMethod51_DT','FSMethod51_NB','FSMethod51_MLR'},...
'VariableNames',{'Accuracy','Precision','Recall'})
m4 = 5×3 table
 AccuracyPrecisionRecall
1 FSMethod51_glm0.49540.52840.3922
2 FSMethod51_TB0.49330.52960.4137
3 FSMethod51_DT0.49020.52990.4781
4 FSMethod51_NB0.50360.51810.3500
5 FSMethod51_MLR0.49540.52840.3922

Using Method 5.2 of feature selection

%Method 5.1
X5=X5(:,1:end-1)
X5 = 1385×20
0.8276 1.0000 0 0 0 1.0000 1.0000 0.4867 0.3597 0.8000 0.1433 0.5056 0.1461 0.7865 0.4719 0 0.5456 0.5280 0.0772 0 0.4828 0.5385 1.0000 1.0000 0 1.0000 0 1.0000 0.5100 0 0.2724 0.9438 0.6292 0.4045 0.8315 0.9593 0.0338 0.4482 0.1707 0.4166 0.8621 0.8462 1.0000 1.0000 1.0000 0 0 0.1303 0.6695 0.4000 0.4384 0.1124 0.6292 0.7640 0.8652 0 0.4755 0.5503 0 0.9103 0.5862 0.8462 1.0000 0 1.0000 1.0000 0 0.3841 0.8138 0 0.4005 0.2809 0.7865 0.4607 0.5506 0.5854 0.8675 0.3744 0.1570 0.9209 0.9310 0.7692 0 1.0000 0 1.0000 1.0000 0.0735 0.6572 0.2000 0.7094 0.7303 0.3146 0.1011 0.9101 0.6911 0.5498 0.6147 1.0000 0.4193 0.8966 0 1.0000 1.0000 0 1.0000 0 0.9653 0.0549 1.0000 0.2864 0.7303 0.9213 0.6404 0.2921 0.8862 0.9637 0.9044 0 0 0.3448 0.3077 0 1.0000 1.0000 1.0000 1.0000 0.9472 0.7744 0.4000 0.6313 0.2022 0.8315 0.8876 0.7640 0.6098 0.2712 0.8604 0.0737 0.2654 0.5517 0.6154 0 1.0000 1.0000 0 1.0000 0.4768 0.4904 0.2000 0.9229 0.8202 0.4607 0.9888 0.0674 0.3902 0.5338 0.0600 0.2110 0.4584 0.4138 0.0769 0 1.0000 1.0000 0 1.0000 0.8221 0.6589 0.4000 0.4187 0.4944 0.1798 0.7079 0.6517 0.2764 0.4924 0.6302 0 0.4590 0.4483 0.6154 0 1.0000 1.0000 0 1.0000 0.4050 0.5315 0.4000 0.0389 0.3258 0.3708 0.9888 0.4719 0.3089 0.9585 0.1918 0.0716 0.3405
[Xtrain51, Ytrain51, Xtest51, Ytest51]=trainTestSplit(X5, Y,0.7);

5 fold cross validation

fold=5;
index1=crossvalind('kfold',size(Xtrain51,1),fold);
accuracy1=zeros(1,fold);
accuracy2=zeros(1,fold);
accuracy3=zeros(1,fold);
accuracy4=zeros(1,fold);
precision1=zeros(1,fold);
precision2=zeros(1,fold);
precision3=zeros(1,fold);
precision4=zeros(1,fold);
err=zeros(1,fold);
recall1=zeros(1,fold);
recall2=zeros(1,fold);
recall3=zeros(1,fold);
recall4=zeros(1,fold);
accuracy5=zeros(1,fold);
precision5=zeros(1,fold);
recall5=zeros(1,fold);
for i=1:fold
test=(index1==i);
train=~test;
Xtrain52=X5(train,:);
Xtest52=X5(test,:);
Ytrain52=Y(train,:);
Ytest52=Y(test,:);
%Logistic Regression
Logmodel=fitglm(Xtrain52,categorical(Ytrain52),'link','logit','Distribution',"binomial",'CategoricalVars',[3:7]);
pred1=predict(Logmodel,Xtest52);pred1(pred1<0.5)=0;pred1(pred1>=0.5)=1;
[accuracy1(1,i),precision1(1,i),recall1(1,i)]=meas(categorical(pred1),Ytest52);
%Random Forest (Classification)
RFmodel=TreeBagger(100,Xtrain52,categorical(Ytrain52),'MinLeafSize',10,'OOBPrediction',"on",'OOBPredictorImportance',"on",'CategoricalPredictors',[3:7]);
pred2=categorical(predict(RFmodel,Xtest52));
[accuracy2(1,i),precision2(1,i),recall2(1,i)]=meas(pred2,Ytest52);
%Decision Tree (Classification)
DecisionTree=fitctree(Xtrain52,categorical(Ytrain52),"CategoricalPredictors",[3:7]);
pred3=predict(DecisionTree,Xtest52);
[accuracy3(1,i),precision3(1,i),recall3(1,i)]=meas(pred3,Ytest52);
err(1,i)=cvloss(DecisionTree);
%Naive Bayes Method
NBmodel=fitcnb(Xtrain52,categorical(Ytrain52),"CategoricalPredictors",[3:7]);
pred4=predict(NBmodel,Xtest52);
[accuracy4(1,i),precision4(1,i),recall4(1,i)]=meas(pred4,Ytest52);
%Multinomial Logistic Regression - Categorical Variables cannot be
%specified
[b5,dev5,stats5]=mnrfit(Xtrain52,categorical(Ytrain52));
[~,pred5]=max(mnrval(b5,Xtest52)');pred5=(pred5-1)';
[accuracy5(1,i),precision5(1,i),recall5(1,i)]=meas(categorical(pred5),Ytest52);
end
avg_cvloss52=mean(err);
method52=[mean(accuracy1),mean(precision1),mean(recall1);...
mean(accuracy2),mean(precision2),mean(recall2);...
mean(accuracy3),mean(precision3),mean(recall3);...
mean(accuracy4),mean(precision4),mean(recall4);...
mean(accuracy5),mean(precision5),mean(recall5)]
method52 = 5×3
0.5015 0.5130 0.4205 0.5067 0.5095 0.4160 0.4994 0.5193 0.4915 0.4861 0.5344 0.4192 0.5015 0.5130 0.4205
m5=array2table(method52,...
"RowNames",{'FSMethod52_glm','FSMethod52_TB','FSMethod52_DT','FSMethod52_NB','FSMethod52_MLR'},...
'VariableNames',{'Accuracy','Precision','Recall'})
m5 = 5×3 table
 AccuracyPrecisionRecall
1 FSMethod52_glm0.50150.51300.4205
2 FSMethod52_TB0.50670.50950.4160
3 FSMethod52_DT0.49940.51930.4915
4 FSMethod52_NB0.48610.53440.4192
5 FSMethod52_MLR0.50150.51300.4205

Lasso Regression

[b_lasso, fit_lasso]=lasso(Xtrain1,Ytrain1,"CV",5);
pred6=round(Xtest1*b_lasso(:,1) + fit_lasso.Intercept(fit_lasso.IndexMinMSE));
[accuracy6,precision6,recall6]=meas(categorical(pred6),Ytest1);
method1(6,:)=[accuracy6,precision6,recall6]
method1 = 6×3
0.5211 0.4832 0.3336 0.4943 0.5272 0.4205 0.4665 0.5521 0.4989 0.5273 0.4744 0.3404 0.5211 0.4832 0.3336 0.5130 0.4932 0.3673
[b_lasso, fit_lasso]=lasso(Xtrain2,Ytrain2,"CV",5);
pred6=round(Xtest2*b_lasso(:,1) + fit_lasso.Intercept(fit_lasso.IndexMinMSE));
[accuracy6,precision6,recall6]=meas(categorical(pred6),Ytest2);
method2(6,:)=[accuracy6,precision6,recall6]
method2 = 6×3
0.5108 0.4970 0.3712 0.4923 0.5249 0.4614 0.5026 0.5123 0.4430 0.5119 0.4928 0.3566 0.5108 0.4970 0.3712 0.5258 0.5476 0.4182
[b_lasso, fit_lasso]=lasso(Xtrain3,Ytrain3,"CV",5);
pred6=round(Xtest3*b_lasso(:,1) + fit_lasso.Intercept(fit_lasso.IndexMinMSE));
[accuracy6,precision6,recall6]=meas(categorical(pred6),Ytest3);
method3(6,:)=[accuracy6,precision6,recall6]
method3 = 6×3
0.4882 0.5309 0.4574 0.4881 0.5362 0.4281 0.5211 0.4942 0.4394 0.4778 0.5432 0.4385 0.4882 0.5309 0.4574 0.5206 0.5098 0.2549
[b_lasso, fit_lasso]=lasso(Xtrain51,Ytrain51,"CV",5);
pred6=round(Xtest51*b_lasso(:,1) + fit_lasso.Intercept(fit_lasso.IndexMinMSE));
[accuracy6,precision6,recall6]=meas(categorical(pred6),Ytest51);
method51(6,:)=[accuracy6,precision6,recall6]
method51 = 6×3
0.4954 0.5284 0.3922 0.4933 0.5296 0.4137 0.4902 0.5299 0.4781 0.5036 0.5181 0.3500 0.4954 0.5284 0.3922 0.4880 0.5368 0.8565
[b_lasso, fit_lasso]=lasso(Xtrain52,Ytrain52,"CV",5);
pred6=round(Xtest52*b_lasso(:,1) + fit_lasso.Intercept(fit_lasso.IndexMinMSE));
[accuracy6,precision6,recall6]=meas(categorical(pred6),Ytest52);
method52(6,:)=[accuracy6,precision6,recall6]
method52 = 6×3
0.5015 0.5130 0.4205 0.5067 0.5095 0.4160 0.4994 0.5193 0.4915 0.4861 0.5344 0.4192 0.5015 0.5130 0.4205 0.4715 0.5773 0.5283
classifierused={'Logistic Regression','Tree Bagger','Decision Tree','Naive Bayes','Multinomial Regression','Lasso Regression'};
for i=1:6
classifierused(i)
array2table([method1(i,:);method2(i,:);method3(i,:);method51(i,:);method52(i,:)],...
'VariableNames',{'Accuracy','Precision','Recall'},'Rownames',...
{'TTest based','Step based','Reduction based','PCA(PC1-2) based','PCA(PC1-4) based'})
end
ans = 1×1 cell array
{'Logistic Regression'}
ans = 5×3 table
 AccuracyPrecisionRecall
1 TTest based0.52110.48320.3336
2 Step based0.51080.49700.3712
3 Reduction based0.48820.53090.4574
4 PCA(PC1-2) based0.49540.52840.3922
5 PCA(PC1-4) based0.50150.51300.4205
ans = 1×1 cell array
{'Tree Bagger'}
ans = 5×3 table
 AccuracyPrecisionRecall
1 TTest based0.49430.52720.4205
2 Step based0.49230.52490.4614
3 Reduction based0.48810.53620.4281
4 PCA(PC1-2) based0.49330.52960.4137
5 PCA(PC1-4) based0.50670.50950.4160
ans = 1×1 cell array
{'Decision Tree'}
ans = 5×3 table
 AccuracyPrecisionRecall
1 TTest based0.46650.55210.4989
2 Step based0.50260.51230.4430
3 Reduction based0.52110.49420.4394
4 PCA(PC1-2) based0.49020.52990.4781
5 PCA(PC1-4) based0.49940.51930.4915
ans = 1×1 cell array
{'Naive Bayes'}
ans = 5×3 table
 AccuracyPrecisionRecall
1 TTest based0.52730.47440.3404
2 Step based0.51190.49280.3566
3 Reduction based0.47780.54320.4385
4 PCA(PC1-2) based0.50360.51810.3500
5 PCA(PC1-4) based0.48610.53440.4192
ans = 1×1 cell array
{'Multinomial Regression'}
ans = 5×3 table
 AccuracyPrecisionRecall
1 TTest based0.52110.48320.3336
2 Step based0.51080.49700.3712
3 Reduction based0.48820.53090.4574
4 PCA(PC1-2) based0.49540.52840.3922
5 PCA(PC1-4) based0.50150.51300.4205
ans = 1×1 cell array
{'Lasso Regression'}
ans = 5×3 table
 AccuracyPrecisionRecall
1 TTest based0.51300.49320.3673
2 Step based0.52580.54760.4182
3 Reduction based0.52060.50980.2549
4 PCA(PC1-2) based0.48800.53680.8565
5 PCA(PC1-4) based0.47150.57730.5283
for i=1:6 % ML
res(i,:)=[method1(i,:),method2(i,:),method3(i,:),method51(i,:),method52(i,:)];
end
array2table(res','VariableNames',classifierused,'RowNames',...
{'TTest based Accuracy','TTest based Precision','TTest based Recall',...
'Step based Accuracy','Step based Precision','Step based Recall',....
'Reduction based Accuracy','Reduction based Precision','Reduction based Recall',...
'PCA(PC1-2) based Accuracy','PCA(PC1-2) based Precision','PCA(PC1-2) based Recall',...
'PCA(PC1-4) based Accuracy','PCA(PC1-4) based Precision','PCA(PC1-4) based Recall'})
ans = 15×6 table
 Logistic RegressionTree BaggerDecision TreeNaive BayesMultinomial RegressionLasso Regression
1 TTest based Accuracy0.52110.49430.46650.52730.52110.5130
2 TTest based Precision0.48320.52720.55210.47440.48320.4932
3 TTest based Recall0.33360.42050.49890.34040.33360.3673
4 Step based Accuracy0.51080.49230.50260.51190.51080.5258
5 Step based Precision0.49700.52490.51230.49280.49700.5476
6 Step based Recall0.37120.46140.44300.35660.37120.4182
7 Reduction based Accuracy0.48820.48810.52110.47780.48820.5206
8 Reduction based Precision0.53090.53620.49420.54320.53090.5098
9 Reduction based Recall0.45740.42810.43940.43850.45740.2549
10 PCA(PC1-2) based Accuracy0.49540.49330.49020.50360.49540.4880
11 PCA(PC1-2) based Precision0.52840.52960.52990.51810.52840.5368
12 PCA(PC1-2) based Recall0.39220.41370.47810.35000.39220.8565
13 PCA(PC1-4) based Accuracy0.50150.50670.49940.48610.50150.4715
14 PCA(PC1-4) based Precision0.51300.50950.51930.53440.51300.5773
15 PCA(PC1-4) based Recall0.42050.41600.49150.41920.42050.5283

UNSUPERVISED LEARNING

K-means Clustering

Here, we define k=2 because the goal of the study is to determine advanced fibrosis only. So the expected answer is yes/no.
K-medoids on Binary data only
x1=X(:,[2,4:10,28]);
[idx,c]=kmedoids(x1,2,'Distance',"hamming");
figure()
silhouette(x1,idx)
title('K-medoids on Binary Data only')
figure()
% Visualize the clustering for say
plot(x1(idx==1,5),x1(idx==1,8),'r.','MarkerSize',12)
hold on
plot(x1(idx==2,5),x1(idx==2,8),'b.','MarkerSize',12)
plot(c(:,5),c(:,8),'kx',...
'MarkerSize',15,'LineWidth',3)
legend('Cluster 1','Cluster 2','Centroids',...
'Location','NW')
title 'Cluster Assignments and Centroids'
hold off
outp=Y+1;
[ac,pr,re]=meas(categorical(idx),outp);
m1=[ac,pr,re];
confusionmat(double(idx),double(outp))
Class List in given sample 1 2 Total Instance = 1385 class1==>1 class2==>2 Confusion Matrix predict_class1 predict_class2 ______________ ______________ Actual_class1 398 441 Actual_class2 270 276 Two-Class Confution Matrix '' 'TruePositive' 'FalsePositive' 'FalseNegative' [ 398] [ 441] 'TrueNegative=TN' [ 270] [ 276] Over all valuses Accuracy: 0.4866 Error: 0.5134 Sensitivity: 0.4744 Specificity: 0.5055 Precision: 0.5958 FalsePositiveRate: 0.4945 F1_score: 0.5282 MatthewsCorrelationCoefficient: 0.0197 Kappa: 0.0187
ans = 2×2
398 441 270 276
K-means on continuous data only
x1=X(:,[1,3,11:27]);
[idx,c]=kmeans(x1,2);
silhouette(x1,idx)
title('K-means on continuous data only')
figure;
% Visualize the clustering for say
plot(x1(idx==1,5),x1(idx==1,8),'r.','MarkerSize',12)
hold on
plot(x1(idx==2,5),x1(idx==2,8),'b.','MarkerSize',12)
plot(c(:,5),c(:,8),'kx',...
'MarkerSize',15,'LineWidth',3)
legend('Cluster 1','Cluster 2','Centroids',...
'Location','NW')
title 'Cluster Assignments and Centroids'
hold off
outp=Y+1;
[ac,pr,re]=meas(categorical(idx),outp);
m2=[ac,pr,re];
confusionmat(double(idx),double(outp))
Class List in given sample 1 2 Total Instance = 1385 class1==>1 class2==>2 Confusion Matrix predict_class1 predict_class2 ______________ ______________ Actual_class1 366 411 Actual_class2 302 306 Two-Class Confution Matrix '' 'TruePositive' 'FalsePositive' 'FalseNegative' [ 366] [ 411] 'TrueNegative=TN' [ 302] [ 306] Over all valuses Accuracy: 0.4852 Error: 0.5148 Sensitivity: 0.4710 Specificity: 0.5033 Precision: 0.5479 FalsePositiveRate: 0.4967 F1_score: 0.5066 MatthewsCorrelationCoefficient: 0.0255 Kappa: 0.0246
ans = 2×2
366 411 302 306
K-means on entire data
[idx,c]=kmeans(X,2);
silhouette(X,idx)
title('K-means on entire data')
figure;
% Visualize the clustering for say
plot(X(idx==1,5),X(idx==1,27),'r.','MarkerSize',12)
hold on
plot(X(idx==2,5),X(idx==2,27),'b.','MarkerSize',12)
plot(c(:,5),c(:,27),'kx',...
'MarkerSize',15,'LineWidth',3)
legend('Cluster 1','Cluster 2','Centroids',...
'Location','NW')
title 'Cluster Assignments and Centroids'
hold off
outp=Y+1;
[ac,pr,re]=meas(categorical(idx),outp);
m3=[ac,pr,re];
confusionmat(double(idx),double(outp))
Class List in given sample 1 2 Total Instance = 1385 class1==>1 class2==>2 Confusion Matrix predict_class1 predict_class2 ______________ ______________ Actual_class1 347 347 Actual_class2 321 370 Two-Class Confution Matrix '' 'TruePositive' 'FalsePositive' 'FalseNegative' [ 347] [ 347] 'TrueNegative=TN' [ 321] [ 370] Over all valuses Accuracy: 0.5177 Error: 0.4823 Sensitivity: 0.5000 Specificity: 0.5355 Precision: 0.5195 FalsePositiveRate: 0.4645 F1_score: 0.5095 MatthewsCorrelationCoefficient: 0.0355 Kappa: 0.0355
ans = 2×2
347 347 321 370
K-medoids on entire data
[idx,c]=kmedoids(X,2);
silhouette(X,idx)
title('K-medoids on entire data')
figure;
% Visualize the clustering for say
plot(X(idx==1,5),X(idx==1,27),'r.','MarkerSize',12)
hold on
plot(X(idx==2,5),X(idx==2,27),'b.','MarkerSize',12)
plot(c(:,5),c(:,27),'kx',...
'MarkerSize',15,'LineWidth',3)
legend('Cluster 1','Cluster 2','Centroids',...
'Location','NW')
title 'Cluster Assignments and Centroids'
hold off
outp=Y+1;
[ac,pr,re]=meas(categorical(idx),outp);
m4=[ac,pr,re];
confusionmat(double(idx),double(outp))
Class List in given sample 1 2 Total Instance = 1385 class1==>1 class2==>2 Confusion Matrix predict_class1 predict_class2 ______________ ______________ Actual_class1 418 400 Actual_class2 250 317 Two-Class Confution Matrix '' 'TruePositive' 'FalsePositive' 'FalseNegative' [ 418] [ 400] 'TrueNegative=TN' [ 250] [ 317] Over all valuses Accuracy: 0.5307 Error: 0.4693 Sensitivity: 0.5110 Specificity: 0.5591 Precision: 0.6257 FalsePositiveRate: 0.4409 F1_score: 0.5626 MatthewsCorrelationCoefficient: 0.0690 Kappa: 0.0674
ans = 2×2
418 400 250 317
array2table([m1;m2;m3;m4],'RowNames',{'K-medoids_Binary','K-means_Continuous','K-means_Entire','K-medoids_Entire'},...
'VariableNames',{'Accuracy','Precision','Recall'})
ans = 4×3 table
 AccuracyPrecisionRecall
1 K-medoids_Binary0.48660.47440.5958
2 K-means_Continuous0.48520.47100.5479
3 K-means_Entire0.51770.50000.5195
4 K-medoids_Entire0.53070.51100.6257

Compare FS Method models with Random Guess

% compare with 100 random predictions
for i = 1:250
random_pred = Ytrain1(randperm(length(Ytest1)));
accuracy_guess(i,1) = sum(random_pred == Ytest1)/length(Ytest1);
precision_guess(i,1) = sum(random_pred==1 & Ytest1==1)/sum(random_pred==1);
recall_guess(i,1) = sum(random_pred==1 & Ytest1==1)/sum(Ytest1==1);
end
histogram(accuracy_guess)
% Naive bayes performed with TTest feature selection gives maximum accuracy
% (Supervised Learning)
[h_acc, p_acc] = ttest2(method1(4,1), accuracy_guess)
h_acc = 0
p_acc = 0.5226
[h_pre, p_pre] = ttest2(method1(4,2), precision_guess)
h_pre = 0
p_pre = 0.3134
[h_rec, p_rec] = ttest2(method1(4,3), recall_guess)
h_rec = 1
p_rec = 3.1421e-07
% K-medoids on entire data performed
[h_acc, p_acc] = ttest2(m4(1,1), accuracy_guess)
h_acc = 0
p_acc = 0.4669
[h_pre, p_pre] = ttest2(m4(1,2), precision_guess)
h_pre = 0
p_pre = 0.9823
[h_rec, p_rec] = ttest2(m4(1,3), recall_guess)
h_rec = 1
p_rec = 0.0170
function [acc,prec,rec]=meas(p,y)
acc=sum(p==categorical(y))/length(y);
p=double(p);
prec=sum(p==1 & y==1)/sum(p==1);
rec=sum(p==1 & y==1)/sum(y==1);
end